Commit 781f69d06feea0aa94e5fd4a873e89070d123a17 - natsort

Merge branch 'new-upstream-release' into 'master' New upstream release See merge request debian/natsort!4 Agustin Henze 5 years ago

72 changed file(s) with 5710 addition(s) and 3600 deletion(s). Raw diff Collapse all Expand all

-5

.coveragerc less more

14	14
15	15	ignore_errors = True
16	16
17		# Files to not perform coverage on
18		omit =
19		natsort/__init__.*
20		natsort/py23compat.*
21		natsort/_version.*

-0

.gitignore less more

12	12	sdist
13	13	develop-eggs
14	14	.installed.cfg
	15	.python-version
15	16
16	17	# We are using MANIFEST.in instead
17	18	MANIFEST

24	25	.coverage
25	26	.tox
26	27	.cache
	28	.pytest_cache
27	29	.pytest
	30	.envrc
28	31
29	32	#Translations
30	33	*.mo
31	34
32	35	#Mr Developer
33	36	.mr.developer.cfg
	37
	38	# PyCharm
	39	.idea

-36

~~.hgignore~~ less more

0		syntax: glob
1
2		*.py[co]
3
4		# Packages
5		*.egg
6		*.eggs
7		*.egg-info
8		dist
9		build
10		eggs
11		parts
12		bin
13		var
14		sdist
15		develop-eggs
16		.installed.cfg
17
18		# We are using MANIFEST.in instead
19		MANIFEST
20
21		# Installer logs
22		pip-log.txt
23
24		# Unit test / coverage reports
25		.hypothesis
26		.coverage
27		.tox
28		.cache
29		.pytest
30
31		#Translations
32		*.mo
33
34		#Mr Developer
35		.mr.developer.cfg

+50

-25

.travis.yml less more

0	0	language: python
1		python:
2		- 2.6
3		- 2.7
4		- 3.2
5		- 3.3
6		- 3.4
7		env:
8		- WITH_OPTIONS=true
9		- WITH_OPTIONS=false
10		before_install:
11		- sudo apt-get update
12		- sudo locale-gen de_DE.UTF-8
13		- sudo apt-get install bc
	1	matrix:
	2	include:
	3	- python: "2.7"
	4	dist: trusty
	5	sudo: false
	6	env: WITH_EXTRAS=""
	7	- python: "2.7"
	8	dist: trusty
	9	sudo: false
	10	env: WITH_EXTRAS="fast,icu"
	11	addons:
	12	apt:
	13	packages:
	14	- libicu-dev
	15	- language-pack-de
	16	- language-pack-en
	17	- python: "3.4"
	18	dist: trusty
	19	sudo: false
	20	env: WITH_EXTRAS=""
	21	- python: "3.5"
	22	dist: trusty
	23	sudo: false
	24	env: WITH_EXTRAS=""
	25	- python: "3.6"
	26	dist: trusty
	27	sudo: false
	28	env: WITH_EXTRAS=""
	29	- python: "3.6"
	30	dist: trusty
	31	sudo: false
	32	env: WITH_EXTRAS="fast,icu"
	33	addons:
	34	apt:
	35	packages:
	36	- libicu-dev
	37	- language-pack-de
	38	- language-pack-en
	39	- python: "3.7"
	40	dist: xenial
	41	sudo: true
	42	env: WITH_EXTRAS=""
	43
14	44	install:
15	45	- pip install -U pip
16		- if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi
17		- if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi
18		- if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi
19		- if [[ 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" \| bc -l) ]]; then pip install pathlib; fi
20		- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi
21		- if [[ $(echo "$TRAVIS_PYTHON_VERSION < 3.3" \| bc -l) ]]; then pip install mock; fi
22		- pip install pytest-cov pytest-flakes pytest-pep8 hypothesis
23		- pip install coveralls
	46	- pip install tox-travis codacy-coverage codecov
	47
24	48	script:
25		- python -m pytest --cov natsort --flakes --pep8
26		- python -m pytest --doctest-modules natsort
27		- python -m pytest README.rst docs/source/intro.rst docs/source/examples.rst
	49	- tox
	50
28	51	after_success:
29		coveralls
	52	- coverage xml
	53	- python-codacy-coverage -r coverage.xml
	54	- codecov

+46

-0

CODE_OF_CONDUCT.md less more

	0	# Contributor Covenant Code of Conduct
	1
	2	## Our Pledge
	3
	4	In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
	5
	6	## Our Standards
	7
	8	Examples of behavior that contributes to creating a positive environment include:
	9
	10	* Using welcoming and inclusive language
	11	* Being respectful of differing viewpoints and experiences
	12	* Gracefully accepting constructive criticism
	13	* Focusing on what is best for the community
	14	* Showing empathy towards other community members
	15
	16	Examples of unacceptable behavior by participants include:
	17
	18	* The use of sexualized language or imagery and unwelcome sexual attention or advances
	19	* Trolling, insulting/derogatory comments, and personal or political attacks
	20	* Public or private harassment
	21	* Publishing others' private information, such as a physical or electronic address, without explicit permission
	22	* Other conduct which could reasonably be considered inappropriate in a professional setting
	23
	24	## Our Responsibilities
	25
	26	Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
	27
	28	Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
	29
	30	## Scope
	31
	32	This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
	33
	34	## Enforcement
	35
	36	Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at drtuba78@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
	37
	38	Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
	39
	40	## Attribution
	41
	42	This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
	43
	44	[homepage]: http://contributor-covenant.org
	45	[version]: http://contributor-covenant.org/version/1/4/

+41

-0

CONTRIBUTING.md less more

	0	# Contributing
	1
	2	If you have an idea for how to improve `natsort`, please contribute! It can
	3	be as simple as a bug fix or documentation update, or as complicated as a more
	4	robust algorithm.
	5
	6	I do not have strong opinions on how one should contribute, so
	7	I have copy/pasted some text verbatim from the
	8	[Contributor's Guide](http://docs.python-requests.org/en/latest/dev/contributing/) section of
	9	[Kenneth Reitz's](http://docs.python-requests.org/en/latest/dev/contributing/)
	10	excellent [requests](https://github.com/kennethreitz/requests) library in
	11	lieu of coming up with my own.
	12
	13	> ### Steps for Submitting Code
	14
	15	> When contributing code, you'll want to follow this checklist:
	16
	17	> - Fork the repository on GitHub.
	18	> - Run the tests to confirm they all pass on your system.
	19	If they don't, you'll need to investigate why they fail.
	20	If you're unable to diagnose this yourself,
	21	raise it as a bug report.
	22	> - Write tests that demonstrate your bug or feature. Ensure that they fail.
	23	> - Make your change.
	24	> - Run the entire test suite again, confirming that all tests pass including the
	25	ones you just added.
	26	> - Send a GitHub Pull Request to the main repository's master branch.
	27	GitHub Pull Requests are the expected method of code collaboration on this project.
	28
	29	> ### Documentation Contributions
	30	> Documentation improvements are always welcome! The documentation files live in the
	31	docs/ directory of the codebase. They're written in
	32	[reStructuredText](http://docutils.sourceforge.net/rst.html), and use
	33	[Sphinx](http://sphinx-doc.org/index.html)
	34	to generate the full suite of documentation.
	35
	36	> When contributing documentation, please do your best to follow the style of the
	37	documentation files. This means a soft-limit of 79 characters wide in your text
	38	files and a semi-formal, yet friendly and approachable, prose style.
	39
	40	> When presenting Python code, use single-quoted strings ('hello' instead of "hello").

-0

ISSUE_TEMPLATE.md less more

	0	## Minimum, Complete, Verifiable Example
	1
	2	See https://stackoverflow.com/help/mcve for explanation.
	3
	4	## Error message, Traceback, Desired behavior, Suggestion, Request, or Question

-1

LICENSE less more

0		Copyright (c) 2012-2015 Seth M. Morton
	0	Copyright (c) 2012-2018 Seth M. Morton
1	1
2	2	Permission is hereby granted, free of charge, to any person obtaining a copy of
3	3	this software and associated documentation files (the "Software"), to deal in

+12

-30

MANIFEST.in less more

0	0	include README.rst
1	1	include LICENSE
2		include natsort/natsort.py
3		include natsort/_version.py
4		include natsort/__main__.py
5		include natsort/__init__.py
6		include natsort/locale_help.py
7		include natsort/utils.py
8		include natsort/ns_enum.py
9		include natsort/unicode_numbers.py
10		include natsort/compat/__init__.py
11		include natsort/compat/py23.py
12		include natsort/compat/fake_fastnumbers.py
13		include natsort/compat/fastnumbers.py
14		include natsort/compat/locale.py
15		include natsort/compat/pathlib.py
16		include natsort/compat/pathlib.py
17		include test_natsort/profile_natsorted.py
18		include test_natsort/stress_natsort.py
19		include test_natsort/slow_splitters.py
20		include test_natsort/test_natsort.py
21		include test_natsort/test_locale_help.py
22		include test_natsort/test_fake_fastnumbers.py
23		include test_natsort/test_main.py
24		include test_natsort/test_utils.py
25		include test_natsort/test_unicode_numbers.py
26		include test_natsort/compat/__init__.py
27		include test_natsort/compat/hypothesis.py
28		include test_natsort/compat/locale.py
29		include test_natsort/compat/mock.py
	2	include *.md
	3	include *.sh
	4	include Pipfile
30	5	include setup.py
31	6	include setup.cfg
32		prune natsort/__pycache__
33		graft docs/source
	7	include tox.ini
	8	include .travis.yml
	9	include .coveragerc
	10	include .gitignore
	11	include .bumpversion.cfg
	12	graft docs
	13	graft natsort
	14	graft test_natsort
	15	global-exclude .py[cod] __pycache__ .so

+13

-0

Pipfile less more

	0	[dev-packages]
	1	coverage = "*"
	2	pytest = "*"
	3	pytest-cov = "*"
	4	pytest-flakes = "*"
	5	pytest-pep8 = "*"
	6	hypothesis = ">=3.8.0"
	7	astroid = "==1.5.3"
	8	pytest-faulthandler = {version = "*", platform_python_implementation = "== 'CPython'"}
	9
	10	# These packages are standard on newer python versions.
	11	pathlib = {version = "*", python_version = "< '3.4'"}
	12	mock = {version = "*", python_version = "< '3.3'"}

+314

-145

README.rst less more

0	0	natsort
1	1	=======
2	2
3		.. image:: https://travis-ci.org/SethMMorton/natsort.svg?branch=master
	3	.. image:: https://img.shields.io/pypi/v/natsort.svg
	4	:target: https://pypi.org/project/natsort/
	5
	6	.. image:: https://img.shields.io/pypi/pyversions/natsort.svg
	7	:target: https://pypi.org/project/natsort/
	8
	9	.. image:: https://img.shields.io/pypi/l/natsort.svg
	10	:target: https://github.com/SethMMorton/natsort/blob/master/LICENSE
	11
	12	.. image:: https://img.shields.io/travis/SethMMorton/natsort/master.svg?label=travis-ci
4	13	:target: https://travis-ci.org/SethMMorton/natsort
5	14
6		.. image:: https://coveralls.io/repos/SethMMorton/natsort/badge.png?branch=master
7		:target: https://coveralls.io/r/SethMMorton/natsort?branch=master
8
9		Natural sorting for python.
	15	.. image:: https://codecov.io/gh/SethMMorton/natsort/branch/master/graph/badge.svg
	16	:target: https://codecov.io/gh/SethMMorton/natsort
	17
	18	.. image:: https://api.codacy.com/project/badge/Grade/f2bf04b1fc5d4792bf546f6e497cf4b8
	19	:target: https://www.codacy.com/app/SethMMorton/natsort
	20
	21	Simple yet flexible natural sorting in Python.
10	22
11	23	- Source Code: https://github.com/SethMMorton/natsort
12		- Downloads: https://pypi.python.org/pypi/natsort
13		- Documentation: http://pythonhosted.org/natsort
14
15		Please see `Moving from older Natsort versions`_ to see if this update requires
16		you to modify your ``natsort`` calls in your code (99% of users will not).
	24	- Downloads: https://pypi.org/project/natsort/
	25	- Documentation: http://natsort.readthedocs.io/
	26
	27	- `Examples and Recipes <http://natsort.readthedocs.io/en/master/examples.html>`_
	28	- `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_
	29	- `API <http://natsort.readthedocs.io/en/master/api.html>`_
	30	- NOTE: The old documentation at pythonhosted.org has been taken down
	31	with no redirects. Please see
	32	`this post <https://opensource.stackexchange.com/q/5941/8999>`_ for an
	33	explanation into why.
	34
	35	- `FAQ`_
	36	- `Optional Dependencies`_
	37
	38	- `fastnumbers <https://pypi.org/project/fastnumbers>`_ >= 2.0.0
	39	- `PyICU <https://pypi.org/project/PyICU>`_ >= 1.0.0
17	40
18	41	Quick Description
19	42	-----------------

24	47
25	48	.. code-block:: python
26	49
27		>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
	50	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
28	51	>>> sorted(a)
29		['a1', 'a10', 'a2', 'a4', 'a9']
	52	['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in']
30	53
31	54	Notice that it has the order ('1', '10', '2') - this is because the list is
32	55	being sorted in lexicographical order, which sorts numbers like you would
33	56	letters (i.e. 'b', 'ba', 'c').
34	57
35		``natsort`` provides a function ``natsorted`` that helps sort lists "naturally",
36		either as real numbers (i.e. signed/unsigned floats or ints), or as versions.
	58	``natsort`` provides a function ``natsorted`` that helps sort lists
	59	"naturally" ("naturally" is rather ill-defined, but in general it means
	60	sorting based on meaning and not computer code point).
37	61	Using ``natsorted`` is simple:
38	62
39	63	.. code-block:: python
40	64
41	65	>>> from natsort import natsorted
42		>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
43		>>> natsorted(a)
44		['a1', 'a2', 'a4', 'a9', 'a10']
45
46		``natsorted`` identifies real numbers anywhere in a string and sorts them
47		naturally.
48
49		Sorting versions is handled properly by default (as of ``natsort`` version >= 4.0.0):
	66	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	67	>>> natsorted(a)
	68	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	69
	70	``natsorted`` identifies numbers anywhere in a string and sorts them
	71	naturally. Below are some other things you can do with ``natsort``
	72	(also see the `examples <http://natsort.readthedocs.io/en/master/examples.html>`_
	73	for a quick start guide, or the
	74	`api <http://natsort.readthedocs.io/en/master/api.html>`_ for complete details).
	75
	76	Note: ``natsorted`` is designed to be a drop-in replacement for the built-in
	77	``sorted`` function. Like ``sorted``, ``natsorted`` `does not sort in-place`.
	78	To sort a list and assign the output to the same variable, you must
	79	explicitly assign the output to a variable:
	80
	81	.. code-block:: python
	82
	83	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	84	>>> natsorted(a)
	85	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	86	>>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list
	87	['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	88	>>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a'
	89	>>> print(a)
	90	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	91
	92	Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for
	93	an alternate way to sort in-place naturally.
	94
	95	Examples
	96	--------
	97
	98	Sorting Versions
	99	++++++++++++++++
	100
	101	This is handled properly by default (as of ``natsort`` version >= 4.0.0):
50	102
51	103	.. code-block:: python
52	104

55	107	['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0']
56	108
57	109	If you need to sort release candidates, please see
58		`this useful hack <http://pythonhosted.org//natsort/examples.htm#rc-sorting>`_ .
59
60		You can also perform locale-aware sorting (or "human sorting"), where the
61		non-numeric characters are ordered based on their meaning, not on their
62		ordinal value; this can be achieved with the ``humansorted`` function:
63
64		.. code-block:: python
65
66		>>> a = ['Apple', 'Banana', 'apple', 'banana']
67		>>> natsorted(a)
68		['Apple', 'Banana', 'apple', 'banana']
	110	`this useful hack <http://natsort.readthedocs.io/en/master/examples.html#rc-sorting>`_.
	111
	112	Sorting by Real Numbers (i.e. Signed Floats)
	113	++++++++++++++++++++++++++++++++++++++++++++
	114
	115	This is useful in scientific data analysis and was
	116	the default behavior of ``natsorted`` for ``natsort``
	117	version < 4.0.0. Use the ``realsorted`` function:
	118
	119	.. code-block:: python
	120
	121	>>> from natsort import realsorted, ns
	122	>>> # Note that when interpreting as signed floats, the below numbers are
	123	>>> # +5.10, -3.00, +5.30, +2.00
	124	>>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data']
	125	>>> natsorted(a)
	126	['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data']
	127	>>> natsorted(a, alg=ns.REAL)
	128	['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
	129	>>> realsorted(a) # shortcut for natsorted with alg=ns.REAL
	130	['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
	131
	132	Locale-Aware Sorting (or "Human Sorting")
	133	+++++++++++++++++++++++++++++++++++++++++
	134
	135	This is where the non-numeric characters are also ordered based on their meaning,
	136	not on their ordinal value, and a locale-dependent thousands separator and decimal
	137	separator is accounted for in the number.
	138	This can be achieved with the ``humansorted`` function:
	139
	140	.. code-block:: python
	141
	142	>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
	143	>>> natsorted(a)
	144	['Apple', 'Banana', 'apple14,689', 'apple15', 'banana']
69	145	>>> import locale
70	146	>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
71	147	'en_US.UTF-8'
	148	>>> natsorted(a, alg=ns.LOCALE)
	149	['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
72	150	>>> from natsort import humansorted
73		>>> humansorted(a)
74		['apple', 'Apple', 'banana', 'Banana']
	151	>>> humansorted(a) # shortcut for natsorted with alg=ns.LOCALE
	152	['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
75	153
76	154	You may find you need to explicitly set the locale to get this to work
77	155	(as shown in the example).
78		Please see the `following caveat <http://pythonhosted.org//natsort/examples.html#bug-note>`_
79		and the `Optional Dependencies`_ section
80		below before using the ``humansorted`` function, especially if you are on a
81		BSD-based system (like Mac OS X).
82
83		You can sort signed floats (i.e. real numbers) using the ``realsorted``; this is
84		useful in scientific data analysis. This was the default behavior of ``natsorted``
85		for ``natsort`` version < 4.0.0:
86
87		.. code-block:: python
88
89		>>> from natsort import realsorted
90		>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
91		>>> natsorted(a)
92		['num2', 'num5.3', 'num5.10', 'num-3']
93		>>> realsorted(a)
94		['num-3', 'num2', 'num5.10', 'num5.3']
	156	Please see `locale issues <http://natsort.readthedocs.io/en/master/locale_issues.html>`_ and the
	157	`Optional Dependencies`_ section below before using the ``humansorted`` function.
	158
	159	Further Customizing Natsort
	160	+++++++++++++++++++++++++++
	161
	162	If you need to combine multiple algorithm modifiers (such as ``ns.REAL``,
	163	``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the
	164	bitwise OR operator (``\|``). For example,
	165
	166	.. code-block:: python
	167
	168	>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
	169	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE)
	170	['Apple', 'apple15', 'apple14,689', 'Banana', 'banana']
	171	>>> # The ns enum provides long and short forms for each option.
	172	>>> ns.LOCALE == ns.L
	173	True
	174	>>> # You can also customize the convenience functions, too.
	175	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE) == realsorted(a, alg=ns.L \| ns.IC)
	176	True
	177	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE) == humansorted(a, alg=ns.R \| ns.IC)
	178	True
	179
	180	All of the available customizations can be found in the documentation for
	181	`the ns enum <http://natsort.readthedocs.io/en/master/ns_class.html>`_.
	182
	183	You can also add your own custom transformation functions with the ``key`` argument.
	184	These can be used with ``alg`` if you wish.
	185
	186	.. code-block:: python
	187
	188	>>> a = ['apple2.50', '2.3apple']
	189	>>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL)
	190	['2.3apple', 'apple2.50']
	191
	192	Sorting Mixed Types
	193	+++++++++++++++++++
95	194
96	195	You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
97	196	when you sort:

104	203	>>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a']
105	204	>>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError
106	205
107		``natsort`` does not officially support the ``bytes`` type on Python 3, but
108		convenience functions are provided that help you decode to ``str`` first:
	206	Handling Bytes on Python 3
	207	++++++++++++++++++++++++++
	208
	209	``natsort`` does not officially support the `bytes` type on Python 3, but
	210	convenience functions are provided that help you decode to `str` first:
109	211
110	212	.. code-block:: python
111	213

121	223	>>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56']
122	224	True
123	225
124		The natsort algorithm does other fancy things like
	226	Generating a Reusable Sorting Key and Sorting In-Place
	227	++++++++++++++++++++++++++++++++++++++++++++++++++++++
	228
	229	Under the hood, ``natsorted`` works by generating a custom sorting
	230	key using ``natsort_keygen`` and then passes that to the built-in
	231	``sorted``. You can use the ``natsort_keygen`` function yourself to
	232	generate a custom sorting key to sort in-place using the ``list.sort``
	233	method.
	234
	235	.. code-block:: python
	236
	237	>>> from natsort import natsort_keygen
	238	>>> natsort_key = natsort_keygen()
	239	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	240	>>> natsorted(a) == sorted(a, key=natsort_key)
	241	True
	242	>>> a.sort(key=natsort_key)
	243	>>> a
	244	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	245
	246	All of the algorithm customizations mentioned in the `Further Customizing Natsort`_
	247	section can also be applied to ``natsort_keygen`` through the alg keyword option.
	248
	249	Other Useful Things
	250	+++++++++++++++++++
125	251
126	252	- recursively descend into lists of lists
127		- control the case-sensitivity
128		- sort file paths correctly
129		- allow custom sorting keys
130		- exposes a natsort_key generator to pass to ``list.sort``
131
132		Please see the package documentation for more details, including
133		`examples and recipes <http://pythonhosted.org//natsort/examples.html>`_.
	253	- automatic unicode normalization of input data
	254	- `controlling the case-sensitivity <http://natsort.readthedocs.io/en/master/examples.html#case-sort>`_
	255	- `sorting file paths correctly <http://natsort.readthedocs.io/en/master/examples.html#path-sort>`_
	256	- `allow custom sorting keys <http://natsort.readthedocs.io/en/master/examples.html#custom-sort>`_
	257
	258	FAQ
	259	---
	260
	261	How do I debug ``natsort.natsorted()``?
	262	The best way to debug ``natsorted()`` is to generate a key using ``natsort_keygen()``
	263	with the same options being passed to ``natsorted``. One can take a look at
	264	exactly what is being done with their input using this key - it is highly recommended
	265	to `look at this issue describing how to debug <https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375>`_
	266	for how to debug, and also to review the
	267	`How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_
	268	page for why ``natsort`` is doing that to your data.
	269
	270	If you are trying to sort custom classes and running into trouble, please take a look at
	271	https://github.com/SethMMorton/natsort/issues/60. In short,
	272	custom classes are not likely to be sorted correctly if one relies
	273	on the behavior of ``__lt__`` and the other rich comparison operators in their
	274	custom class - it is better to use a ``key`` function with ``natsort``, or
	275	use the ``natsort`` key as part of your rich comparison operator definition.
	276
	277	How does ``natsort`` work?
	278	If you don't want to read `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_,
	279	here is a quick primer.
	280
	281	``natsort`` provides a `key function <https://docs.python.org/3/howto/sorting.html#key-functions>`_
	282	that can be passed to `list.sort() <https://docs.python.org/3/library/stdtypes.html#list.sort>`_
	283	or `sorted() <https://docs.python.org/3/library/functions.html#sorted>`_ in order to
	284	modify the default sorting behavior. This key is generated on-demand with the
	285	key generator ``natsort.natsort_keygen()``. ``natsort.natsorted()`` is essentially
	286	a wrapper for the following code:
	287
	288	.. code-block:: python
	289
	290	>>> from natsort import natsort_keygen
	291	>>> natsort_key = natsort_keygen()
	292	>>> sorted(['1', '10', '2'], key=natsort_key)
	293	['1', '2', '10']
	294
	295	Users can further customize ``natsort`` sorting behavior with the ``key``
	296	and/or ``alg`` options (see details in the `Further Customizing Natsort`_
	297	section).
	298
	299	The key generated by ``natsort_keygen`` always returns a ``tuple``. It
	300	does so in the following way (some details omitted for clarity):
	301
	302	1. Assume the input is a string, and attempt to split it into numbers and
	303	non-numbers using regular expressions. Numbers are then converted into
	304	either ``int`` or ``float``.
	305	2. If the above fails because the input is not a string, assume the input
	306	is some other sequence (e.g. ``list`` or ``tuple``), and recursively
	307	apply the key to each element of the sequence.
	308	3. If the above fails because the input is not iterable, assume the input
	309	is an ``int`` or ``float``, and just return the input in a ``tuple``.
	310
	311	Because a ``tuple`` is always returned, a ``TypeError`` should not be common
	312	unless one tries to do something odd like sort an ``int`` against a ``list``.
	313
	314	``natsort`` gave me results I didn't expect, and it's a terrible library!
	315	Did you try to debug using the above advice? If so, and you still cannot figure out
	316	the error, then please `file an issue <https://github.com/SethMMorton/natsort/issues/new>`_.
134	317
135	318	Shell script
136	319	------------

141	324	Requirements
142	325	------------
143	326
144		``natsort`` requires Python version 2.7 or greater or Python 3.2 or greater.
145
146		.. _optional:
	327	``natsort`` requires Python version 2.6 or greater or Python 3.3 or greater.
	328	It may run on (but is not tested against) Python 3.2.
147	329
148	330	Optional Dependencies
149	331	---------------------
150	332
151	333	fastnumbers
152		'''''''''''
153
154		The most efficient sorting can occur if you install the
155		`fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps
156		with the string to number conversions.) ``natsort`` will still run (efficiently)
157		without the package, but if you need to squeeze out that extra juice it is
158		recommended you include this as a dependency. ``natsort`` will not require (or
159		check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed
	334	+++++++++++
	335
	336	The most efficient sorting can occur if you install the
	337	`fastnumbers <https://pypi.org/project/fastnumbers>`_ package
	338	(version >=0.7.1); it helps with the string to number conversions.
	339	``natsort`` will still run (efficiently) without the package, but if you need
	340	to squeeze out that extra juice it is recommended you include this as a dependency.
	341	``natsort`` will not require (or check) that
	342	`fastnumbers <https://pypi.org/project/fastnumbers>`_ is installed
160	343	at installation.
161	344
162	345	PyICU
163		'''''
164
165		On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library
166		can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is
167		used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this,
168		one can
169
170		1. Use "\.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\.UTF-8"
171		locale. These locales do not suffer from as many problems as "UTF-8"
172		and thus should give expected results.
173		2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
174		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
175		will use it under the hood; this will give more
176		reliable cross-platform results in the long run. ``natsort`` will not
177		require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
178		is installed at installation. Please visit
179		https://github.com/SethMMorton/natsort/issues/21 for more details and
180		how to install on Mac OS X. Please note that using
181		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
182		guarantee correct results for all input on BSD-based systems, since
183		every other suggestion is a workaround.
184		3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
185		to compensate for a broken ``locale`` library in terms of case-handling;
186		if you do not need to be able to properly handle non-ASCII characters
187		then this may be the best option for you.
188
189		Note that the above solutions should not be required for Windows or
190		Linux since in Linux-based systems and Windows systems ``locale`` should work
191		just fine.
192
193		.. _deprecate:
194
195		Moving from older Natsort versions
196		----------------------------------
197
198		- The default sorting algorithm for ``natsort`` has changed in version 4.0.0
199		from signed floats (with exponents) to unsigned integers. The motivation
200		for this change is that it will cause ``natsort`` to return results that
201		pass the "least astonishment" test for the most common use case, which is
202		sorting version numbers. If you relied on the default behavior
203		to be signed floats, add ``alg=ns.F \| ns.S`` to your
204		``natsort`` calls or switch to the new ``realsorted`` function which
205		behaves identically to the older ``natsorted`` with default values.
206		For 99% of users this change will not effect their code... it is only
207		expected that this will effect users using ``natsort`` for science and
208		engineering.
209		This will also affect the default behavior of the ``natsort`` shell script.
210		- In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``,
211		``as_path``, and ``py3_safe`` options have be removed from the (documented)
212		API in favor of the ``alg`` option and ``ns`` enum.
213		- In ``natsort`` version 4.0.0, the ``natsort_key`` function has been removed
214		from the public API.
	346	+++++
	347
	348	It is recommended that you install `PyICU <https://pypi.org/project/PyICU>`_
	349	if you wish to sort in a locale-dependent manner, see
	350	http://natsort.readthedocs.io/en/master/locale_issues.html for an explanation why.
	351
	352	Installation
	353	------------
	354
	355	Use ``pip``!
	356
	357	.. code-block:: sh
	358
	359	$ pip install natsort
	360
	361	If you want to install the `Optional Dependencies`_, you can use the
	362	`"extras" notation <https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras>`_
	363	at installation time to install those dependencies as well - use ``fast`` for
	364	`fastnumbers <https://pypi.org/project/fastnumbers>`_ and ``icu`` for
	365	`PyICU <https://pypi.org/project/PyICU>`_.
	366
	367	.. code-block:: sh
	368
	369	# Install both optional dependencies.
	370	$ pip install natsort[fast,icu]
	371	# Install just fastnumbers
	372	$ pip install natsort[fast]
	373
	374	How to Run Tests
	375	----------------
	376
	377	Please note that ``natsort`` is NOT set-up to support ``python setup.py test``.
	378
	379	The recommended way to run tests is with `tox <https://tox.readthedocs.io/en/latest/>`_.
	380	After installing ``tox``, running tests is as simple as executing the following in the
	381	``natsort`` directory:
	382
	383	.. code-block:: sh
	384
	385	$ tox
	386
	387	``tox`` will create virtual a virtual environment for your tests and install all the
	388	needed testing requirements for you. You can specify a particular python version
	389	with the ``-e`` flag, e.g. ``tox -e py36``.
	390
	391	If you do not wish to use ``tox``, you can install the testing dependencies and run the
	392	tests manually using `pytest <https://docs.pytest.org/en/latest/>`_ - ``natsort``
	393	contains a ``Pipfile`` for use with `pipenv <https://github.com/pypa/pipenv>`_ that
	394	makes it easy for you to install the testing dependencies:
	395
	396	.. code-block:: sh
	397
	398	$ pipenv install --skip-lock --dev
	399	$ pipenv run python -m pytest
	400
	401	Note that above I invoked ``python -m pytest`` instead of just ``pytest`` - this is because
	402	`the former puts the CWD on sys.path <https://docs.pytest.org/en/latest/usage.html#calling-pytest-through-python-m-pytest>`_.
215	403
216	404	Author
217	405	------

221	409	History
222	410	-------
223	411
224		These are the last three entries of the changelog. See the package documentation
225		for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_.
226
227		06-25-2015 v. 4.0.3
228		'''''''''''''''''''
229
230		- Fixed bad install on last release (sorry guys!).
231
232		06-24-2015 v. 4.0.2
233		'''''''''''''''''''
234
235		- Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now
236		performed for these versions.
237		- Consolidated under-the-hood compatibility functionality.
238
239		06-04-2015 v. 4.0.1
240		'''''''''''''''''''
241
242		- Added support for sorting NaN by internally converting to -Infinity
243		or +Infinity
	412	Please visit the `changelog <http://natsort.readthedocs.io/en/master/changelog.html>`_.

-0

clean.sh less more

	0	#! /bin/bash
	1
	2	rm -rf build/ dist/ *.egg-info .pytest_cache/ .hypothesis/ .tox/
	3	find . -type d -name __pycache__ -delete
	4	find . -type f -name "*.pyc" -delete

+17

-0

debian/changelog less more

	0	natsort (5.3.3-1) unstable; urgency=medium
	1
	2	* [d/rules]
	3	* Remove obsolete get-orig-source target
	4	* Use pkg-info.mk instead of parsing changelog by hand
	5	* [d/control]
	6	* Remove obsolete X-Python-* fields
	7	* Update Vcs-* fields pointing to salsa
	8	* Bump Standards-Version to 4.2.0 (no changes needed)
	9	* Fix capitalization of Python on package description
	10	* Add (autopkg) test
	11	* Fix debian/watch, upstream doesn't use `v` for tags anymore
	12	* Update dh to 11 (no changes needed)
	13	* New upstream version 5.3.3
	14
	15	-- Agustin Henze <tin@debian.org> Mon, 20 Aug 2018 10:14:23 +0200
	16
0	17	natsort (4.0.3-2) unstable; urgency=medium
1	18
2	19	* Fix code example in package description (Closes: #778767)

-0

debian/patches/series less more

skip-fail-test

+10

-0

debian/patches/skip-fail-test less more

	0	--- a/test_natsort/test_input_string_transform_factory.py
	1	+++ b/test_natsort/test_input_string_transform_factory.py
	2	@@ -121,6 +121,7 @@
	3	locale.setlocale(locale.LC_ALL, str(''))
	4
	5
	6	+@pytest.mark.skip("It's always failing, reported to upstream https://github.com/SethMMorton/natsort/issues/65")
	7	def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT_example():
	8	x = '12,543,642,642.534,534,980'
	9	assert _input_string_transform_factory(ns.LOCALE \| ns.FLOAT)(x) == '12543642642.534,534980'

-2

debian/python-natsort-doc.doc-base less more

13	13	Section: Programming/Python
14	14
15	15	Format: HTML
16		Index: /usr/share/doc/python-natsort-doc/html/index.html
17		Files: /usr/share/doc/python-natsort-doc/html/*
	16	Index: /usr/share/doc/python-natsort/html/index.html
	17	Files: /usr/share/doc/python-natsort/html/*

-3

debian/tests/control less more

0	0	Tests: unittests
1	1	Depends: @,
2		python3-setuptools,
3		python-setuptools,
	2	tox,
4	3	locales-all,
5		ca-certificates,
	4	build-essential,
	5	python2-dev,
	6	python3-dev,

-2

debian/tests/unittests less more

0		python3 setup.py test
1		python setup.py test
	0	tox

-0

docs/source/api.rst less more

20	20	order_by_index.rst
21	21	ns_class.rst
22	22	bytes.rst
	23	chain.rst
	24	locale_issues.rst

+16

-0

docs/source/chain.rst less more

	0	.. default-domain:: py
	1	.. currentmodule:: natsort
	2
	3	.. _function_help:
	4
	5	Help With Creating Function Keys
	6	================================
	7
	8	If you need to create a complicated key argument to (for example)
	9	:func:`natsorted` that is actually multiple functions called one after the other,
	10	the following function can help you easily perform this action. It is
	11	used internally to :mod:`natsort`, and has been exposed publically for
	12	the convenience of the user.
	13
	14	.. autofunction:: chain_functions
	15

+125

-27

docs/source/changelog.rst less more

2	2	Changelog
3	3	---------
4	4
	5	07-07-2018 v. 5.3.3
	6	+++++++++++++++++++
	7
	8	- Update docs with a FAQ and quick how-it-works.
	9	- Fix a StopIteration error in the testing code.
	10	- Enable Python 3.7 support in Travis-CI.
	11
	12	05-17-2018 v. 5.3.2
	13	+++++++++++++++++++
	14
	15	- Fix bug that prevented install on old versions of setuptools.
	16	- Revert layout from src/natsort/ back to natsort/ to make user
	17	testing simpler.
	18
	19	05-14-2018 v. 5.3.1
	20	+++++++++++++++++++
	21
	22	- No bugfixes or features, just infrastructure and installation updates.
	23	- Move to defining dependencies with Pipfile.
	24	- Development layout is now src/natsort/ instead of natsort/.
	25	- Add bumpversion infrastructure.
	26	- Extras can be installed by "[]" notation.
	27
	28	04-20-2018 v. 5.3.0
	29	+++++++++++++++++++
	30
	31	- Fix bug in assessing ``fastnumbers`` version at import-time.
	32	- Add ability to consider unicode-decimal numbers as numbers.
	33
	34	02-14-2018 v. 5.2.0
	35	+++++++++++++++++++
	36
	37	- Add ``ns.NUMAFTER`` to cause numbers to be placed after non-numbers.
	38	- Add ``natcmp`` function (Python 2 only).
	39
	40	11-11-2017 v. 5.1.1
	41	+++++++++++++++++++
	42
	43	- Added additional unicode number support for Python 3.7.
	44	- Added information on how to install and test.
	45
	46	08-19-2017 v. 5.1.0
	47	+++++++++++++++++++
	48
	49	- Fixed ``StopIteration`` warning on Python 3.6+.
	50	- All Unicode input is now normalized.
	51
	52	04-30-2017 v. 5.0.3
	53	+++++++++++++++++++
	54
	55	- Improved development infrastructure.
	56	- Migrated documentation to ReadTheDocs.
	57
	58	01-02-2017 v. 5.0.2
	59	+++++++++++++++++++
	60
	61	- Added additional unicode number support for Python 3.6.
	62	- Renamed several internal functions and variables to improve clarity.
	63	- Improved documentation examples.
	64	- Added a "how does it work?" section to the documentation.
	65
	66	06-04-2016 v. 5.0.1
	67	+++++++++++++++++++
	68
	69	- The ``ns`` enum attributes can now be imported from the top-level
	70	namespace.
	71	- Fixed a bug with the ``from natsort import *`` mechanism.
	72	- Fixed bug with using ``natsort`` with ``python -OO``.
	73
	74	05-08-2016 v. 5.0.0
	75	+++++++++++++++++++
	76
	77	- ``ns.LOCALE``/``humansorted`` now accounts for thousands separators.
	78	- Refactored entire codebase to be more functional (as in use functions as
	79	units). Previously, the code was rather monolithic and difficult to follow. The
	80	goal is that with the code existing in smaller units, contributing will
	81	be easier.
	82	- Deprecated ``ns.TYPESAFE`` option as it is now always on (due to a new
	83	iterator-based algorithm, the typesafe function is now cheap).
	84	- Increased speed of execution (came for free with the new functional approach
	85	because the new factory function paradigm eliminates most ``if`` branches
	86	during execution).
	87
	88	- For the most cases, the code is 30-40% faster than version 4.0.4.
	89	- If using ``ns.LOCALE`` or ``humansorted``, the code is 1100% faster than
	90	version 4.0.4.
	91
	92	- Improved clarity of documentaion with regards to locale-aware sorting.
	93	- Added a new ``chain_functions`` function for convenience in creating
	94	a complex user-given ``key`` from several existing functions.
	95
	96	11-01-2015 v. 4.0.4
	97	+++++++++++++++++++
	98
	99	- Improved coverage of unit tests.
	100	- Unit tests use new and improved hypothesis library.
	101	- Fixed compatibility issues with Python 3.5
	102
5	103	06-25-2015 v. 4.0.3
6		'''''''''''''''''''
	104	+++++++++++++++++++
7	105
8	106	- Fixed bad install on last release (sorry guys!).
9	107
10	108	06-24-2015 v. 4.0.2
11		'''''''''''''''''''
	109	+++++++++++++++++++
12	110
13	111	- Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now
14	112	performed for these versions.
15	113	- Consolidated under-the-hood compatibility functionality.
16	114
17	115	06-04-2015 v. 4.0.1
18		'''''''''''''''''''
	116	+++++++++++++++++++
19	117
20	118	- Added support for sorting NaN by internally converting to -Infinity
21	119	or +Infinity
22	120
23	121	05-17-2015 v. 4.0.0
24		'''''''''''''''''''
	122	+++++++++++++++++++
25	123
26	124	- Made default behavior of 'natsort' search for unsigned ints,
27	125	rather than signed floats. This is a backwards-incompatible

32	130	- Greatly improved all unit tests by adding the hypothesis library.
33	131
34	132	04-06-2015 v. 3.5.6
35		'''''''''''''''''''
	133	+++++++++++++++++++
36	134
37	135	- Added 'UNGROUPLETTERS' algorithm to get the case-grouping behavior of
38	136	an ordinal sort when using 'LOCALE'.

40	138	dealing with bytes types.
41	139
42	140	04-04-2015 v. 3.5.5
43		'''''''''''''''''''
	141	+++++++++++++++++++
44	142
45	143	- Added 'realsorted' and 'index_realsorted' functions for
46	144	forward-compatibility with >= 4.0.0.
47	145	- Made explanation of when to use "TYPESAFE" more clear in the docs.
48	146
49	147	04-02-2015 v. 3.5.4
50		'''''''''''''''''''
	148	+++++++++++++++++++
51	149
52	150	- Fixed bug where a 'TypeError' was raised if a string containing a leading
53	151	number was sorted with alpha-only strings when 'LOCALE' is used.
54	152
55	153	03-26-2015 v. 3.5.3
56		'''''''''''''''''''
	154	+++++++++++++++++++
57	155
58	156	- Fixed bug where '--reverse-filter' option in shell script was not
59	157	getting checked for correctness.

62	160	- Internal improvements, including making test suite more granular.
63	161
64	162	01-13-2015 v. 3.5.2
65		'''''''''''''''''''
	163	+++++++++++++++++++
66	164
67	165	- Enhancement that will convert a 'pathlib.Path' object to a 'str' if
68	166	'ns.PATH' is enabled.
69	167
70	168	09-25-2014 v. 3.5.1
71		'''''''''''''''''''
	169	+++++++++++++++++++
72	170
73	171	- Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST'
74	172	or 'ns.IGNORECASE'.

78	176
79	177
80	178	09-02-2014 v. 3.5.0
81		'''''''''''''''''''
	179	+++++++++++++++++++
82	180
83	181	- Added the 'alg' argument to the 'natsort' functions. This argument
84	182	accepts an enum that is used to indicate the options the user wishes

96	194	- Updated shell script with locale functionality.
97	195
98	196	08-12-2014 v. 3.4.1
99		'''''''''''''''''''
	197	+++++++++++++++++++
100	198
101	199	- 'natsort' will now use the 'fastnumbers' module if it is installed. This
102	200	gives up to an extra 30% boost in speed over the previous performance

105	203	new example in the examples section.
106	204
107	205	07-19-2014 v. 3.4.0
108		'''''''''''''''''''
	206	+++++++++++++++++++
109	207
110	208	- Fixed a bug that caused user's options to the 'natsort_key' to not be
111	209	passed on to recursive calls of 'natsort_key'.

134	232	- Entire codebase is now PyFlakes and PEP8 compliant.
135	233
136	234	06-28-2014 v. 3.3.0
137		'''''''''''''''''''
	235	+++++++++++++++++++
138	236
139	237	- Added a 'versorted' method for more convenient sorting of versions.
140	238	- Updated command-line tool --number_type option with 'version' and 'ver'

149	247	- Connected natsort development to Travis-CI to help ensure quality releases.
150	248
151	249	06-20-2014 v. 3.2.1
152		'''''''''''''''''''
	250	+++++++++++++++++++
153	251
154	252	- Re-"Fixed" unorderable types issue on Python 3.x - this workaround
155	253	is for when the problem occurs in the middle of the string.
156	254
157	255	05-07-2014 v. 3.2.0
158		'''''''''''''''''''
	256	+++++++++++++++++++
159	257
160	258	- "Fixed" unorderable types issue on Python 3.x with a workaround that
161	259	attempts to replicate the Python 2.x behavior by putting all the numbers

164	262	to MANIFEST.in.
165	263
166	264	05-05-2014 v. 3.1.2
167		'''''''''''''''''''
	265	+++++++++++++++++++
168	266
169	267	- Added setup.cfg to support universal wheels.
170	268	- Added Python 3.0 and Python 3.1 as requiring the argparse module.
171	269
172	270	03-01-2014 v. 3.1.1
173		'''''''''''''''''''
	271	+++++++++++++++++++
174	272
175	273	- Added ability to sort lists of lists.
176	274	- Cleaned up import statements.
177	275
178	276	01-20-2014 v. 3.1.0
179		'''''''''''''''''''
	277	+++++++++++++++++++
180	278
181	279	- Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting
182	280	- Entire codebase now works for both Python 2 and Python 3 without needing to run

196	294	to filter by.
197	295
198	296	10-01-2013 v. 3.0.2
199		'''''''''''''''''''
	297	+++++++++++++++++++
200	298
201	299	- Made float, int, and digit searching algorithms all share the same base function.
202	300	- Fixed some outdated comments.
203	301	- Made the ``__version__`` variable available when importing the module.
204	302
205	303	8-15-2013 v. 3.0.1
206		''''''''''''''''''
	304	++++++++++++++++++
207	305
208	306	- Added support for unicode strings.
209	307	- Removed extraneous ``string2int`` function.
210	308	- Fixed empty string removal function.
211	309
212	310	7-13-2013 v. 3.0.0
213		''''''''''''''''''
	311	++++++++++++++++++
214	312
215	313	- Added a ``number_type`` argument to the sorting functions to specify how
216	314	liberal to be when deciding what a number is.
217	315	- Reworked the documentation.
218	316
219	317	6-25-2013 v. 2.2.0
220		''''''''''''''''''
	318	++++++++++++++++++
221	319
222	320	- Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that
223	321	it mimics the functionality of the built-in ``sorted``

225	323	how to get similar functionality using ``natsort_key``.
226	324
227	325	12-5-2012 v. 2.1.0
228		''''''''''''''''''
	326	++++++++++++++++++
229	327
230	328	- Reorganized package.
231	329	- Now using a platform independent shell script generator (entry_points

234	332	as well.
235	333
236	334	11-30-2012 v. 2.0.2
237		'''''''''''''''''''
	335	+++++++++++++++++++
238	336
239	337	- Added the use_2to3 option to setup.py.
240	338	- Added distribute_setup.py to the distribution.
241	339	- Added dependency to the argparse module (for python2.6).
242	340
243	341	11-21-2012 v. 2.0.1
244		'''''''''''''''''''
	342	+++++++++++++++++++
245	343
246	344	- Reorganized directory structure.
247	345	- Added tests into the natsort.py file iteself.
248	346
249	347	11-16-2012, v. 2.0.0
250		''''''''''''''''''''
	348	++++++++++++++++++++
251	349
252	350	- Updated sorting algorithm to support floats (including exponentials) and
253	351	basic version number support.

+12

-19

docs/source/conf.py less more

12	12	# serve to show the default.
13	13
14	14	import os
15		import re
16
17		def current_version():
18		# Read the _version.py file for the module version number
19		VERSIONFILE = os.path.join('..', '..', 'natsort', '_version.py')
20		versionsearch = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]")
21		with open(VERSIONFILE, "rt") as fl:
22		for line in fl:
23		m = versionsearch.search(line)
24		if m:
25		return m.group(1)
26		else:
27		s = "Unable to locate version string in {0}"
28		raise RuntimeError(s.format(VERSIONFILE))
29	15
30	16	# If extensions (or modules to document with autodoc) are in another directory,
31	17	# add these directories to sys.path here. If the directory is relative to the

44	30	'sphinx.ext.autodoc',
45	31	'sphinx.ext.autosummary',
46	32	'sphinx.ext.intersphinx',
47		'numpydoc',
	33	'sphinx.ext.mathjax',
	34	'sphinx.ext.napoleon',
48	35	]
49	36
50	37	# Add any paths that contain templates here, relative to this directory.

68	55	# built documents.
69	56	#
70	57	# The full version, including alpha/beta/rc tags.
71		release = current_version()
	58	release = '5.3.3'
72	59	# The short X.Y version.
73	60	version = '.'.join(release.split('.')[0:2])
74	61

84	71
85	72	# List of patterns, relative to source directory, that match files and
86	73	# directories to ignore when looking for source files.
87		exclude_patterns = ['solar/*']
	74	# exclude_patterns = ['solar/*']
88	75
89	76	# The reST default role (used for this markup: `text`) to use for all
90	77	# documents.

116	103
117	104	# The theme to use for HTML and HTML Help pages. See the documentation for
118	105	# a list of builtin themes.
119		html_theme = 'solar'
	106	on_rtd = os.environ.get('READTHEDOCS') == 'True'
	107	if on_rtd:
	108	html_theme = 'default'
	109	else:
	110	import sphinx_rtd_theme
	111	html_theme = 'sphinx_rtd_theme'
	112	# html_theme = 'solar'
120	113
121	114	# Theme options are theme-specific and customize the look and feel of a theme
122	115	# further. For a list of options available for each theme, see the

277	270
278	271
279	272	# Example configuration for intersphinx: refer to the Python standard library.
280		intersphinx_mapping = {'http://docs.python.org/': None}
	273	intersphinx_mapping = {'python': ('https://docs.python.org/3', None)}

+100

-63

docs/source/examples.rst less more

8	8	If you want more detailed examples than given on this page, please see
9	9	https://github.com/SethMMorton/natsort/tree/master/test_natsort.
10	10
	11	.. contents::
	12	:local:
	13
11	14	Basic Usage
12	15	-----------
13	16
14	17	In the most basic use case, simply import :func:`~natsorted` and use
15		it as you would :func:`sorted`::
16
17		>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
	18	it as you would :func:`sorted`:
	19
	20	.. code-block:: python
	21
	22	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
18	23	>>> sorted(a)
19		['a1', 'a10', 'a2', 'a4', 'a9']
	24	['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in']
20	25	>>> from natsort import natsorted, ns
21	26	>>> natsorted(a)
22		['a1', 'a2', 'a4', 'a9', 'a10']
	27	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
23	28
24	29	Sort Version Numbers
25	30	--------------------

34	39	++++++++++++++++++++++++++++++++++++++++++++++++
35	40
36	41	By default, if you wish to sort versions with a non-strict versioning
37		scheme, you may not get the results you expect::
	42	scheme, you may not get the results you expect:
	43
	44	.. code-block:: python
38	45
39	46	>>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3']
40	47	>>> natsorted(a)
41	48	['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3']
42	49
43	50	To make the '1.2' pre-releases come before '1.2.1', you need to use the following
44		recipe::
	51	recipe:
	52
	53	.. code-block:: python
45	54
46	55	>>> natsorted(a, key=lambda x: x.replace('.', '~'))
47	56	['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3']
48	57
49	58	If you also want '1.2' after all the alpha, beta, and rc candidates, you can
50		modify the above recipe::
	59	modify the above recipe:
	60
	61	.. code-block:: python
51	62
52	63	>>> natsorted(a, key=lambda x: x.replace('.', '~')+'z')
53	64	['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3']

55	66	Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to
56	67	see why this works.
57	68
	69	.. _path_sort:
	70
58	71	Sort OS-Generated Paths
59	72	-----------------------
60	73
61	74	In some cases when sorting file paths with OS-Generated names, the default
62	75	:mod:`~natsorted` algorithm may not be sufficient. In cases like these,
63		you may need to use the ``ns.PATH`` option::
	76	you may need to use the ``ns.PATH`` option:
	77
	78	.. code-block:: python
64	79
65	80	>>> a = ['./folder/file (1).txt',
66	81	... './folder/file.txt',

74	89	Locale-Aware Sorting (Human Sorting)
75	90	------------------------------------
76	91
	92	.. note::
	93	Please read :ref:`locale_issues` before using ``ns.LOCALE``, :func:`humansorted`,
	94	or :func:`index_humansorted`.
	95
77	96	You can instruct :mod:`natsort` to use locale-aware sorting with the
78	97	``ns.LOCALE`` option. In addition to making this understand non-ASCII
79	98	characters, it will also properly interpret non-'.' decimal separators
80	99	and also properly order case. It may be more convenient to just use
81		the :func:`humansorted` function::
	100	the :func:`humansorted` function:
	101
	102	.. code-block:: python
82	103
83	104	>>> from natsort import humansorted
84	105	>>> import locale

92	113
93	114	You may find that if you do not explicitly set the locale your results may not
94	115	be as you expect... I have found that it depends on the system you are on.
95		If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see below) then
	116	If you use `PyICU <https://pypi.org/project/PyICU>`_ (see below) then
96	117	you should not need to do this.
97	118
98		.. _bug_note:
99
100		Known Bugs When Using Locale-Aware Sorting On BSD-Based OSs (Including Mac OS X)
101		++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
102
103		If you find that ``ns.LOCALE`` (or :func:`~humansorted`) does not give
104		the results you expect, before filing a bug report please try to first install
105		`PyICU <https://pypi.python.org/pypi/PyICU>`_; this especially applies
106		to users on BSD-based systems (like Mac OS X). There are some known bugs
107		with the ``locale`` module from the standard library that are solved when
108		using `PyICU <https://pypi.python.org/pypi/PyICU>`_; you can read about
109		them here: http://bugs.python.org/issue23195.
110
111		If you have problems with ``ns.LOCALE`` (or :func:`~humansorted`),
112		especially on BSD-based systems, you can try the following:
113
114		1. Use "\.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\.UTF-8"
115		locale. These locales do not suffer from as many problems as "UTF-8"
116		and thus should give expected results.
117		2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
118		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
119		will use it under the hood; this will give more
120		reliable cross-platform results in the long run. ``natsort`` will not
121		require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
122		is installed at installation. Please visit
123		https://github.com/SethMMorton/natsort/issues/21 for more details and
124		how to install on Mac OS X. Please note that using
125		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
126		guarantee correct results for all input on BSD-based systems, since
127		every other suggestion is a workaround.
128		3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
129		to compensate for a broken ``locale`` library in terms of case-handling;
130		if you do not need to be able to properly handle non-ASCII characters
131		then this may be the best option for you.
132
133		Note that the above solutions should not be required for Windows or
134		Linux since in Linux-based systems and Windows systems ``locale`` should work
135		just fine.
	119	.. _case_sort:
136	120
137	121	Controlling Case When Sorting
138	122	-----------------------------
139	123
140	124	For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e.
141		it sorts by the character's value in the ASCII table). For example::
	125	it sorts by the character's value in the ASCII table). For example:
	126
	127	.. code-block:: python
142	128
143	129	>>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
144	130	>>> natsorted(a)
145	131	['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
146	132
147	133	There are times when you wish to ignore the case when sorting,
148		you can easily do this with the ``ns.IGNORECASE`` option::
	134	you can easily do this with the ``ns.IGNORECASE`` option:
	135
	136	.. code-block:: python
149	137
150	138	>>> natsorted(a, alg=ns.IGNORECASE)
151	139	['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']

156	144
157	145	Upper-case letters appear first in the ASCII table, but many natural
158	146	sorting methods place lower-case first. To do this, use
159		``ns.LOWERCASEFIRST``::
	147	``ns.LOWERCASEFIRST``:
	148
	149	.. code-block:: python
160	150
161	151	>>> natsorted(a, alg=ns.LOWERCASEFIRST)
162	152	['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']

164	154	It may be undesirable to have the upper-case letters grouped together
165	155	and the lower-case letters grouped together; most would expect all
166	156	"a"s to bet together regardless of case, and all "b"s, and so on. To
167		achieve this, use ``ns.GROUPLETTERS``::
	157	achieve this, use ``ns.GROUPLETTERS``:
	158
	159	.. code-block:: python
168	160
169	161	>>> natsorted(a, alg=ns.GROUPLETTERS)
170	162	['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
171	163
172	164	You might combine this with ``ns.LOWERCASEFIRST`` to get what most
173		would expect to be "natural" sorting::
	165	would expect to be "natural" sorting:
	166
	167	.. code-block:: python
174	168
175	169	>>> natsorted(a, alg=ns.G \| ns.LF)
176	170	['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']

181	175	You can make :func:`~natsorted` search for any float that would be
182	176	a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc.
183	177	using the ``ns.FLOAT`` key. You can disable the exponential component
184		of the number with ``ns.NOEXP``. ::
	178	of the number with ``ns.NOEXP``.
	179
	180	.. code-block:: python
185	181
186	182	>>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300']
187	183	>>> natsorted(a, alg=ns.FLOAT)

196	192	This can be easily accessed with the :func:`~realsorted` convenience
197	193	function. Please note that the behavior of the :func:`~realsorted` function
198	194	was the default behavior of :func:`~natsorted` for :mod:`natsort`
199		version < 4.0.0::
	195	version < 4.0.0:
	196
	197	.. code-block:: python
200	198
201	199	>>> natsorted(a, alg=ns.REAL)
202	200	['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']

204	202	>>> realsorted(a)
205	203	['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.']
206	204
	205	.. _custom_sort:
	206
207	207	Using a Custom Sorting Key
208	208	--------------------------
209	209
210	210	Like the built-in ``sorted`` function, ``natsorted`` can accept a custom
211		sort key so that::
	211	sort key so that:
	212
	213	.. code-block:: python
212	214
213	215	>>> from operator import attrgetter, itemgetter
214	216	>>> a = [['a', 'num4'], ['b', 'num8'], ['c', 'num2']]

228	230
229	231	If you need to sort a list in-place, you cannot use :func:`~natsorted`; you
230	232	need to pass a key to the :meth:`list.sort` method. The function
231		:func:`~natsort_keygen` is a convenient way to generate these keys for you::
	233	:func:`~natsort_keygen` is a convenient way to generate these keys for you:
	234
	235	.. code-block:: python
232	236
233	237	>>> from natsort import natsort_keygen
234	238	>>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300']

240	244	:func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the
241	245	`reverse` option).
242	246
	247	Natural Sorting with ``cmp`` (Python 2 only)
	248	--------------------------------------------
	249
	250	.. note::
	251	This is a Python2-only feature! The :func:`natcmp` function is not
	252	exposed on Python3. Because this documentation is built with
	253	Python3, you will not find :func:`natcmp` in the API.
	254
	255	If you are using a legacy codebase that requires you to use :func:`cmp` instead
	256	of a key-function, you can use :func:`~natcmp`.
	257
	258	.. code-block:: python
	259
	260	>>> import sys
	261	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	262	>>> if sys.version_info[0] == 2:
	263	... from natsort import natcmp
	264	... sorted(a, cmp=natcmp)
	265	... else:
	266	... natsorted(a) # so docstrings don't fail
	267	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	268
	269	:func:`natcmp` also accepts an ``alg`` argument so you can customize your
	270	sorting experience.
	271
243	272	Sorting Multiple Lists According to a Single List
244	273	-------------------------------------------------
245	274

247	276	lists and reorder the other lists according to how the first was sorted.
248	277	To achieve this you could use the :func:`~index_natsorted` in combination
249	278	with the convenience function
250		:func:`~order_by_index`::
	279	:func:`~order_by_index`:
	280
	281	.. code-block:: python
251	282
252	283	>>> from natsort import index_natsorted, order_by_index
253	284	>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']

265	296	----------------------------------
266	297
267	298	Just like the :func:`sorted` built-in function, you can supply the
268		``reverse`` option to return the results in reverse order::
	299	``reverse`` option to return the results in reverse order:
	300
	301	.. code-block:: python
269	302
270	303	>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
271	304	>>> natsorted(a, reverse=True)

286	319	these functions know not to raise an error if the input is not a byte
287	320	array, so you can use the key on any arbitrary collection of data.
288	321
289		::
	322	.. code-block:: python
290	323
291	324	>>> from natsort import as_ascii
292	325	>>> a = [b'a', 14.0, 'b']

299	332	so that :mod:`natsort` cannot parse them for numbers. As a result, if you
300	333	run :mod:`natsort` on a list of bytes, you will get results that are like
301	334	Python's default sorting behavior. Of course, you can use the decoding
302		functions to solve this::
	335	functions to solve this:
	336
	337	.. code-block:: python
303	338
304	339	>>> from natsort import as_utf8
305	340	>>> a = [b'a56', b'a5', b'a6', b'a40']

309	344	True
310	345
311	346	If you need a codec different from ASCII or UTF-8, you can use
312		:func:`decoder` to generate a custom key::
	347	:func:`decoder` to generate a custom key:
	348
	349	.. code-block:: python
313	350
314	351	>>> from natsort import decoder
315	352	>>> a = [b'a56', b'a5', b'a6', b'a40']

+1135

-0

docs/source/howitworks.rst less more

	0	.. default-domain:: py
	1	.. currentmodule:: natsort
	2
	3	.. _howitworks:
	4
	5	How Does Natsort Work?
	6	======================
	7
	8	.. contents::
	9	:local:
	10
	11	:mod:`natsort` works by breaking strings into smaller sub-components (numbers
	12	or everything else), and returning these components in a tuple. Sorting
	13	tuples in Python is well-defined, and this fact is used to sort the input
	14	strings properly. But how does one break a string into sub-components?
	15	And what does one do to those components once they are split? Below I
	16	will explain the algorithm that was chosen for the :mod:`natsort` module,
	17	and some of the thinking that went into those design decisions. I will
	18	also mention some of the stumbling blocks I ran into because
	19	`getting sorting right is surprisingly hard`_.
	20
	21	If you are impatient, you can skip to :ref:`tldr1` for the algorithm
	22	in the simplest case, and :ref:`tldr2`
	23	to see what extra code is needed to handle special cases.
	24
	25	First, How Does Natural Sorting Work At a High Level?
	26	-----------------------------------------------------
	27
	28	If I want to compare '2 ft 7 in' to '2 ft 11 in', I might do the following
	29
	30	.. code-block:: python
	31
	32	>>> '2 ft 7 in' < '2 ft 11 in'
	33	False
	34
	35	We as humans know that the above should be true, but why does Python think it
	36	is false? Here is how it is performing the comparison::
	37
	38	'2' <=> '2' ==> equal, so keep going
	39	' ' <=> ' ' ==> equal, so keep going
	40	'f' <=> 'f' ==> equal, so keep going
	41	't' <=> 't' ==> equal, so keep going
	42	' ' <=> ' ' ==> equal, so keep going
	43	'7' <=> '1' ==> different, use result of '7' < '1'
	44
	45	'7' evaluates as greater than '1' so the statement is false. When sorting, if
	46	a value is less than another it is placed first, so in our above example
	47	'2 ft 11 in' would end up before '2 ft 7 in', which is not correct. What to do?
	48
	49	The best way to handle this is to break the string into sub-components
	50	of numbers and non-numbers, and then convert the numeric parts into
	51	:func:`float` or :func:`int` types. This will force Python to
	52	actually understand the context of what it is sorting and then "do the
	53	right thing." Luckily, it handles sorting lists of strings right out-of-the-box,
	54	so the only hard part is actually making this string-to-list transformation
	55	and then Python will handle the rest.
	56
	57	::
	58
	59	'2 ft 7 in' ==> (2, ' ft ', 7, ' in')
	60	'2 ft 11 in' ==> (2, ' ft ', 11, ' in')
	61
	62	When Python compares the two, it roughly follows the below logic::
	63
	64	2 <=> 2 ==> equal, so keep going
	65	' ft ' <=> ' ft ' ==> a string is a special type of sequence - evaluate each character individually
	66	\|\|
	67	-->
	68	' ' <=> ' ' ==> equal, so keep going
	69	'f' <=> 'f' ==> equal, so keep going
	70	't' <=> 't' ==> equal, so keep going
	71	' ' <=> ' ' ==> equal, so keep going
	72	<== Back to parent sequence
	73	7 <=> 11 ==> different, use the result of 7 < 11
	74
	75	Clearly, seven is less than eleven, so our comparison is as we expect, and we
	76	would get the sorting order we wanted.
	77
	78	At its heart, :mod:`natsort` is simply a tool to break strings into tuples,
	79	turning numbers in strings (i.e. ``'79'``) into ints and floats as it does this.
	80
	81	Natsort's Approach
	82	------------------
	83
	84	.. contents::
	85	:local:
	86
	87	Decomposing Strings Into Sub-Components
	88	+++++++++++++++++++++++++++++++++++++++
	89
	90	The first major hurtle to overcome is to decompose the string into sub-components.
	91	Remarkably, this turns out to be the easy part, owing mostly to Python's easy access
	92	to regular expressions. Breaking an arbitrary string based on a pattern is pretty
	93	straightforward.
	94
	95	.. code-block:: python
	96
	97	>>> import re
	98	>>> re.split(r'(\d+)', '2 ft 11 in')
	99	['', '2', ' ft ', '11', ' in']
	100
	101	Clear (assuming you can read regular expressions) and concise.
	102
	103	The reason I began developing :mod:`natsort` in the first place was because I
	104	needed to handle the natural sorting of strings containing real numbers, not just
	105	unsigned integers as the above example contains. By real numbers, I mean those like
	106	``-45.4920E-23``. :mod:`natsort` can handle just about any number definition;
	107	to that end, here are all the regular expressions used in :mod:`natsort`:
	108
	109	.. code-block:: python
	110
	111	>>> unsigned_int = r'([0-9]+)'
	112	>>> signed_int = r'([-+]?[0-9]+)'
	113	>>> unsigned_float = r'((?:[0-9]+\.?[0-9]*\|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)'
	114	>>> signed_float = r'([-+]?(?:[0-9]+\.?[0-9]*\|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)'
	115	>>> unsigned_float_no_exponent = r'((?:[0-9]+\.?[0-9]*\|\.[0-9]+))'
	116	>>> signed_float_no_exponent = r'([-+]?(?:[0-9]+\.?[0-9]*\|\.[0-9]+))'
	117
	118	Note that ``"inf"`` and ``"nan"`` are deliberately omitted from the float definition because you
	119	wouldn't want (for example) ``"banana"`` to be converted into ``['ba', 'nan', 'a']``,
	120	Let's see an example:
	121
	122	.. code-block:: python
	123
	124	>>> re.split(signed_float, 'The mass of 3 electrons is 2.732815068E-30 kg')
	125	['The mass of ', '3', ' electrons is ', '2.732815068E-30', ' kg']
	126
	127	.. note::
	128
	129	It is a bit of a lie to say the above are the complete regular expressions. In the
	130	actual code there is also handling for non-ASCII unicode characters (such as ⑦),
	131	but I will ignore that aspect of :mod:`natsort` in this discussion.
	132
	133	Now, when the user wants to change the definition of a number, it is as easy as changing
	134	the pattern supplied to the regular expression engine.
	135
	136	Choosing the right default is hard, though (well, in this case it shouldn't have been
	137	but I was rather thick-headed).
	138	In retrospect, it should have been obvious that since essentially all the code examples
	139	I had/have seen for natural sorting were for unsigned integers, I should have made the default
	140	definition of a number an unsigned integer. But, in the brash days of my youth I assumed
	141	that since my use case was real numbers, everyone else would be happier sorting by real numbers;
	142	so, I made the default definition of a number a signed float with exponent.
	143	`This astonished`_ `a lot`_ `of people`_
	144	(`and some people aren't very nice when they are astonished`_).
	145	Starting with :mod:`natsort` version 4.0.0 the default number definition was
	146	changed to an unsigned integer which satisfies the "least astonishment" principle, and
	147	I have not heard a complaint since.
	148
	149	.. admonition:: Wouldn't itertools.groupby work as well as regex to split strings?
	150
	151	You could do it using something like :func:`itertools.groupby`, but it is not clearer
	152	nor more concise, I promise.
	153
	154	.. code-block:: python
	155
	156	>>> import itertools
	157	>>> import operator
	158	>>> list(map(''.join, map(operator.itemgetter(1), itertools.groupby('2 ft 11 in', str.isdigit))))
	159	['2', ' ft ', '11', ' in']
	160
	161	OK, but let's assume for a moment that you really like itertools and think the above
	162	is fine. We still have lost a lot of flexibility here because of the :meth:`str.isdigit`
	163	call which makes this method non-optimal; with a regular expression one can change
	164	the pattern string and split on much more complicated patterns, but with
	165	:func:`itertools.groupby` it becomes much more complicated to change it up;
	166	I implemented this strategy `as part of my testing`_ and it is anything but clear an concise.
	167
	168	Not to mention it's way slower than regex. Just the simple example above (unsigned integers)
	169	is 50% slower than regex...
	170
	171	Coercing Strings Containing Numbers Into Numbers
	172	++++++++++++++++++++++++++++++++++++++++++++++++
	173
	174	There has been some debate on Stack Overflow as to what method is best to
	175	coerce a string to a number if it can be coerced, and leaving it alone otherwise
	176	(see `this one for coercion`_ and `this one for checking`_ for some high traffic questions),
	177	but it mostly boils down to two different solutions, shown here:
	178
	179	.. code-block:: python
	180
	181	>>> def coerce_try_except(x):
	182	... try:
	183	... return int(x)
	184	... except ValueError:
	185	... return x
	186	...
	187	>>> def coerce_regex(x):
	188	... # Note that precompiling the regex is more performant,
	189	... # but I do not show that here for clarity's sake.
	190	... return int(x) if re.match(r'[-+]?\d+$', x) else x
	191	...
	192
	193	Here are some timing results run on my machine:
	194
	195	::
	196
	197	In [0]: numbers = list(map(str, range(100))) # A list of numbers as strings
	198
	199	In [1]: not_numbers = ['banana' + x for x in numbers]
	200
	201	In [2]: %timeit [coerce_try_except(x) for x in numbers]
	202	10000 loops, best of 3: 51.1 µs per loop
	203
	204	In [3]: %timeit [coerce_try_except(x) for x in not_numbers]
	205	1000 loops, best of 3: 289 µs per loop
	206
	207	In [4]: %timeit [coerce_regex(x) for x in not_numbers]
	208	10000 loops, best of 3: 67.6 µs per loop
	209
	210	In [5]: %timeit [coerce_regex(x) for x in numbers]
	211	10000 loops, best of 3: 123 µs per loop
	212
	213	What can we learn from this? The ``try: except`` method (arguably the most "pythonic"
	214	of the solutions) is best for numeric input, but performs over 5X slower for non-numeric
	215	input. Conversely, the regular expression method, though slower than ``try: except`` for
	216	both input types, is more efficient for non-numeric input than for input that can be
	217	converted to an ``int``. Further, even though the regular expression method is slower
	218	for both input types, it is always at least twice as fast as the worst case for the
	219	``try: except``.
	220
	221	Why do I care? Shouldn't I just pick a method and not worry about it? Probably. However,
	222	I am very conscious about the performance of :mod:`natsort`, and want it to be a true
	223	drop-in replacement for :func:`sorted` without having to incur a performance penalty.
	224	For the purposes of :mod:`natsort`, there is no clear winner between the two algorithms -
	225	the data being passed to this function will likely be a mix of numeric and non-numeric
	226	string content. Do I use the ``try: except`` method and hope the speed gains on
	227	numbers will offset the non-number performance, or do I use regular expressions and
	228	take the more stable performance?
	229
	230	It turns out that within the context of :mod:`natsort`, some assumptions can be
	231	made that make a hybrid approach attractive. Because all strings are pre-split
	232	into numeric and non-numeric content before being passed to this coercion function,
	233	the assumption can be made that *if a string begins with a digit or a sign, it
	234	can be coerced into a number*.
	235
	236	.. code-block:: python
	237
	238	>>> def coerce_to_int(x):
	239	... if x[0] in '0123456789+-':
	240	... try:
	241	... return int(x)
	242	... except ValueError:
	243	... return x
	244	... else:
	245	... return x
	246	...
	247
	248	So how does this perform compared to the standard coercion methods?
	249
	250	::
	251
	252	In [6]: %timeit [coerce_to_int(x) for x in numbers]
	253	10000 loops, best of 3: 71.6 µs per loop
	254
	255	In [7]: %timeit [coerce_to_int(x) for x in not_numbers]
	256	10000 loops, best of 3: 26.4 µs per loop
	257
	258	The hybrid method eliminates most of the time wasted on numbers checking that it
	259	is in fact a number before passing to :func:`int`, and eliminates the time wasted
	260	in the exception stack for input that is not a number.
	261
	262	That's as fast as we can get, right? In pure Python, probably. At least, it's
	263	close. But because I am crazy and a glutton for punishment, I decided to see
	264	if I could get any faster writing a C extension. It's called
	265	`fastnumbers`_ and contains a C implementation of the above coercion functions
	266	called :func:`fast_int`. How does it fair? Pretty well.
	267
	268	::
	269
	270	In [8]: %timeit [fast_int(x) for x in numbers]
	271	10000 loops, best of 3: 30.9 µs per loop
	272
	273	In [9]: %timeit [fast_int(x) for x in not_numbers]
	274	10000 loops, best of 3: 30 µs per loop
	275
	276	During development of :mod:`natsort`, I wanted to ensure that using it did not
	277	get in the way of a user's program by introducing a performance penalty to their code.
	278	To that end, I do not feel like my adventures down the rabbit hole of optimization
	279	of coercion functions was a waste; I can confidently look users in the eye and
	280	say I considered every option in ensuring :mod:`natsort` is as efficient as possible.
	281	This is why if `fastnumbers`_ is installed it will be used for this step,
	282	and otherwise the hybrid method will be used.
	283
	284	.. note::
	285
	286	Modifying the hybrid coercion function for floats is straightforward.
	287
	288	.. code-block:: python
	289
	290	>>> def coerce_to_float(x):
	291	... if x[0] in '.0123456789+-' or x.lower().lstrip()[:3] in ('nan', 'inf'):
	292	... try:
	293	... return float(x)
	294	... except ValueError:
	295	... return x
	296	... else:
	297	... return x
	298	...
	299
	300	.. _tldr1:
	301
	302	TL;DR 1 - The Simple "No Special Cases" Algorithm
	303	+++++++++++++++++++++++++++++++++++++++++++++++++
	304
	305	At this point, our :mod:`natsort` algorithm is essentially the following:
	306
	307	.. code-block:: python
	308
	309	>>> import re
	310	>>> def natsort_key(x, as_float=False, signed=False):
	311	... if as_float:
	312	... regex = signed_float if signed else unsigned_float
	313	... else:
	314	... regex = signed_int if signed else unsigned_int
	315	... split_input = re.split(regex, x)
	316	... split_input = filter(None, split_input) # removes null strings
	317	... coerce = coerce_to_float if as_float else coerce_to_int
	318	... return tuple(coerce(s) for s in split_input)
	319	...
	320
	321	I have written the above for clarity and not performance.
	322	This pretty much matches `most natural sort solutions for python on Stack Overflow`_
	323	(except the above includes customization of the definition of a number).
	324
	325	Special Cases Everywhere!
	326	-------------------------
	327
	328	.. contents::
	329	:local:
	330
	331	.. image:: special_cases_everywhere.jpg
	332
	333	If what I described in :ref:`TL;DR 1 <tldr1>` were
	334	all that :mod:`natsort` needed to
	335	do then there probably wouldn't be much need for a third-party module, right?
	336	Probably. But it turns out that in real-world data there are a lot of
	337	special cases that need to be handled, and in true `80%/20%`_ fashion, the
	338	majority of the code in :mod:`natsort` is devoted to handling special cases
	339	like those described below.
	340
	341	Sorting Filesystem Paths
	342	++++++++++++++++++++++++
	343
	344	`The first major special case I encountered was sorting filesystem paths`_
	345	(if you go to the link, you will see I didn't handle it well for a year...
	346	this was before I fully realized how much functionality I could really add
	347	to :mod:`natsort`). Let's apply the :func:`natsort_key` from above to some
	348	filesystem paths that you might see being auto-generated from your operating
	349	system:
	350
	351	.. code-block:: python
	352
	353	>>> paths = ['/p/Folder (10)/file.tar.gz',
	354	... '/p/Folder/file.tar.gz',
	355	... '/p/Folder (1)/file (1).tar.gz',
	356	... '/p/Folder (1)/file.tar.gz']
	357	>>> sorted(paths, key=natsort_key)
	358	['/p/Folder (1)/file (1).tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz', '/p/Folder/file.tar.gz']
	359
	360	Well that's not right! What is ``'/p/Folder/file.tar.gz'`` doing at the end?
	361	It has to do with the numerical ASCII code assigned to the space and
	362	``/`` characters in the `ASCII table`_. According to the `ASCII table`_, the
	363	space character (number 32) comes before the ``/`` character (number 47). If
	364	we remove the common prefix in all of the above strings (``'/p/Folder'``), we
	365	can see why this happens:
	366
	367	.. code-block:: python
	368
	369	>>> ' (1)/file.tar.gz' < '/file.tar.gz'
	370	True
	371	>>> ' ' < '/'
	372	True
	373
	374	This isn't very convenient... how do we solve it? We can split the path
	375	across the path separators and then sort. A convenient way do to this is
	376	with the `Path.parts`_ method from :mod:`pathlib`:
	377
	378	.. code-block:: python
	379
	380	>>> import pathlib
	381	>>> sorted(paths, key=lambda x: tuple(natsort_key(s) for s in pathlib.Path(x).parts))
	382	['/p/Folder/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz']
	383
	384	Almost! It seems like there is some funny business going on in the final
	385	filename component as well. We can solve that nicely and quickly with `Path.suffixes`_
	386	and `Path.stem`_.
	387
	388	.. code-block:: python
	389
	390	>>> def decompose_path_into_components(x):
	391	... path_split = list(pathlib.Path(x).parts)
	392	... # Remove the final filename component from the path.
	393	... final_component = pathlib.Path(path_split.pop())
	394	... # Split off all the extensions.
	395	... suffixes = final_component.suffixes
	396	... stem = final_component.name.replace(''.join(suffixes), '')
	397	... # Remove the '.' prefix of each extension, and make that
	398	... # final component a list of the stem and each suffix.
	399	... final_component = [stem] + [x[1:] for x in suffixes]
	400	... # Replace the split final filename component.
	401	... path_split.extend(final_component)
	402	... return path_split
	403	...
	404	>>> def natsort_key_with_path_support(x):
	405	... return tuple(natsort_key(s) for s in decompose_path_into_components(x))
	406	...
	407	>>> sorted(paths, key=natsort_key_with_path_support)
	408	['/p/Folder/file.tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', '/p/Folder (10)/file.tar.gz']
	409
	410	This works because in addition to breaking the input by path separators, the final
	411	filename component is separated from its extensions as well [#f1]_. Then, each of these
	412	separated components is sent to the :mod:`natsort` algorithm, so the result is
	413	a tuple of tuples. Once that is done, we can see how comparisons can be done in
	414	the expected manner.
	415
	416	.. code-block:: python
	417
	418	>>> a = natsort_key_with_path_support('/p/Folder (1)/file (1).tar.gz')
	419	>>> a
	420	(('/',), ('p',), ('Folder (', 1, ')'), ('file (', 1, ')'), ('tar',), ('gz',))
	421	>>>
	422	>>> b = natsort_key_with_path_support('/p/Folder/file.tar.gz')
	423	>>> b
	424	(('/',), ('p',), ('Folder',), ('file',), ('tar',), ('gz',))
	425	>>>
	426	>>> a > b
	427	True
	428
	429	Comparing Different Types on Python 3
	430	+++++++++++++++++++++++++++++++++++++
	431
	432	`The second major special case I encountered was sorting of different types`_.
	433	If you are on Python 2 (i.e. legacy Python), this mostly doesn't matter too
	434	much since it uses an arbitrary heuristic to allow traditionally un-comparable
	435	types to be compared (such as comparing ``'a'`` to ``1``). However, on Python 3
	436	(i.e. Python) it simply won't let you perform such nonsense, raising a
	437	:exc:`TypeError` instead.
	438
	439	You can imagine that a module that breaks strings into tuples of numbers and
	440	strings is walking a dangerous line if it does not have special handling for
	441	comparing numbers and strings. My imagination was not so great at first.
	442	Let's take a look at all the ways this can fail with real-world data.
	443
	444	.. code-block:: python
	445
	446	>>> def natsort_key_with_poor_real_number_support(x):
	447	... split_input = re.split(signed_float, x)
	448	... split_input = filter(None, split_input) # removes null strings
	449	... return tuple(coerce_to_float(s) for s in split_input)
	450	>>>
	451	>>> sorted([5, '4'], key=natsort_key_with_poor_real_number_support)
	452	Traceback (most recent call last):
	453	...
	454	TypeError: ...
	455	>>>
	456	>>> sorted(['12 apples', 'apples'], key=natsort_key_with_poor_real_number_support)
	457	Traceback (most recent call last):
	458	...
	459	TypeError: ...
	460	>>>
	461	>>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_poor_real_number_support)
	462	Traceback (most recent call last):
	463	...
	464	TypeError: ...
	465
	466	Let's break these down.
	467
	468	#. The integer ``5`` is sent to ``re.split`` which expects only strings
	469	or bytes, which is a no-no.
	470	#. ``natsort_key_with_poor_real_number_support('12 apples') < natsort_key_with_poor_real_number_support('apples')``
	471	is the same as ``(12.0, ' apples') < ('apples',)``, and thus a number gets
	472	compared to a string [#f2]_ which also is a no-no.
	473	#. This one scores big on the astonishment scale, especially if one accidentally
	474	uses signed integers or real numbers when they mean to use unsigned integers.
	475	``natsort_key_with_poor_real_number_support('version5.3.0') < natsort_key_with_poor_real_number_support('version5.3rc1')``
	476	is the same as ``('version', 5.3, 0.0) < ('version', 5.3, 'rc', 1.0)``, so in the
	477	third element a number gets compared to a string, once again the same
	478	old no-no. (The same would happen with ``'version5-3'`` and ``'version5-a'``,
	479	which would be come ``('version', 5, -3)`` and ``('version', 5, '-a')``).
	480
	481	As you might expect, the solution to the first issue is to wrap the ``re.split``
	482	call in a ``try: except:`` block and handle the number specially if a
	483	:exc:`TypeError` is raised. The second and third cases could be handled
	484	in a "special case" manner, meaning only respond and do something different
	485	if these problems are detected. But a less error-prone method is to ensure
	486	that the data is correct-by-construction, and this can be done by ensuring
	487	that the returned tuples always start with a string, and then alternate
	488	in a string-number-string-number-string patter;n this can be achieved by
	489	adding an empty string wherever the pattern is not followed [#f3]_. This ends
	490	up working out pretty nicely because empty strings are always "less" than
	491	any non-empty string, and we typically want numbers to come before strings.
	492
	493	Let's take a look at how this works out.
	494
	495	.. code-block:: python
	496
	497	>>> from natsort.utils import _sep_inserter
	498	>>> list(_sep_inserter(iter(['apples']), ''))
	499	['apples']
	500	>>>
	501	>>> list(_sep_inserter(iter([12, ' apples']), ''))
	502	['', 12, ' apples']
	503	>>>
	504	>>> list(_sep_inserter(iter(['version', 5, -3]), ''))
	505	['version', 5, '', -3]
	506	>>>
	507	>>> from natsort import natsort_keygen, ns
	508	>>> natsort_key_with_good_real_number_support = natsort_keygen(alg=ns.REAL)
	509	>>>
	510	>>> sorted([5, '4'], key=natsort_key_with_good_real_number_support)
	511	['4', 5]
	512	>>>
	513	>>> sorted(['12 apples', 'apples'], key=natsort_key_with_good_real_number_support)
	514	['12 apples', 'apples']
	515	>>>
	516	>>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_good_real_number_support)
	517	['version5.3.0', 'version5.3rc1']
	518
	519	How the "good" version works will be given in `TL;DR 2 - Handling Crappy, Real-World Input`_.
	520
	521	Handling NaN
	522	++++++++++++
	523
	524	`A rather unexpected special case I encountered was sorting collections containing NaN`_.
	525	Let's see what happens when you try to sort a plain old list of numbers when there
	526	is a NaN floating around in there.
	527
	528	.. code-block:: python
	529
	530	>>> danger = [7, float('nan'), 22.7, 19, -14, 59.123, 4]
	531	>>> sorted(danger)
	532	[7, nan, -14, 4, 19, 22.7, 59.123]
	533
	534	Clearly that isn't correct, and for once it isn't my fault!
	535	`It's hard to compare floating point numbers`_. By definition, NaN is unorderable
	536	to any other number, and is never equal to any other number, including itself.
	537
	538	.. code-block:: python
	539
	540	>>> nan = float('nan')
	541	>>> 5 > nan
	542	False
	543	>>> 5 < nan
	544	False
	545	>>> 5 == nan
	546	False
	547	>>> 5 != nan
	548	True
	549	>>> nan == nan
	550	False
	551	>>> nan != nan
	552	True
	553
	554	The implication of all this for us is that if there is an NaN in the
	555	data-set we are trying to sort, the data-set will end up being sorted in
	556	two separate yet individually sorted sequences - the one before the NaN,
	557	and the one after. This is because the ``<`` operation that is used
	558	to sort always returns :const:`False` with NaN.
	559
	560	Because :mod:`natsort` aims to sort sequences in a way that does not surprise
	561	the user, keeping this behavior is not acceptable (I don't require my users
	562	to know how NaN will behave in a sorting algorithm). The simplest way to
	563	satisfy the "least astonishment" principle is to substitute NaN with
	564	some other value. But what value is least astonishing? I chose to replace
	565	NaN with :math:`-\infty` so that these poorly behaved elements always
	566	end up at the front where the users will most likely be alerted to their presence.
	567
	568	.. code-block:: python
	569
	570	>>> def fix_nan(x):
	571	... if x != x: # only true for NaN
	572	... return float('-inf')
	573	... else:
	574	... return x
	575	...
	576
	577	Let's check out :ref:`TL;DR 2 <tldr2>` to see how this can be
	578	incorporated into the simple key function from :ref:`TL;DR 1 <tldr1>`.
	579
	580	.. _tldr2:
	581
	582	TL;DR 2 - Handling Crappy, Real-World Input
	583	+++++++++++++++++++++++++++++++++++++++++++
	584
	585	Let's see how our elegant key function from :ref:`TL;DR 1 <tldr1>` has
	586	become bastardized in order to support handling mixed real-world data
	587	and user customizations.
	588
	589	>>> def natsort_key(x, as_float=False, signed=False, as_path=False):
	590	... if as_float:
	591	... regex = signed_float if signed else unsigned_float
	592	... else:
	593	... regex = signed_int if signed else unsigned_int
	594	... try:
	595	... if as_path:
	596	... x = decompose_path_into_components(x) # Decomposes into list of strings
	597	... # If this raises a TypeError, input is not a string.
	598	... split_input = re.split(regex, x)
	599	... except TypeError:
	600	... try:
	601	... # Does this need to be applied recursively (list-of-list)?
	602	... return tuple(map(natsort_key, x))
	603	... except TypeError:
	604	... # Must be a number
	605	... ret = ('', fix_nan(x)) # Maintain string-number-string pattern
	606	... return (ret,) if as_path else ret # as_path returns tuple-of-tuples
	607	... else:
	608	... split_input = filter(None, split_input) # removes null strings
	609	... # Note that the coerce_to_int/coerce_to_float functions
	610	... # are also modified to use the fix_nan function.
	611	... if as_float:
	612	... coerced_input = (coerce_to_float(s) for s in split_input)
	613	... else:
	614	... coerced_input = (coerce_to_int(s) for s in split_input)
	615	... return tuple(_sep_inserter(coerced_input, ''))
	616	...
	617
	618	And this doesn't even show handling :class:`bytes` type! Notice that we have
	619	to do non-obvious things like modify the return form of numbers when ``as_path``
	620	is given, just to avoid comparing strings and numbers for the case in which a user provides
	621	input like ``['/home/me', 42]``.
	622
	623	Let's take it out for a spin!
	624
	625	.. code-block:: python
	626
	627	>>> danger = [7, float('nan'), 22.7, '19', '-14', '59.123', 4]
	628	>>> sorted(danger, key=lambda x: natsort_key(x, as_float=True, signed=True))
	629	[nan, '-14', 4, 7, '19', 22.7, '59.123']
	630	>>>
	631	>>> paths = ['/p/Folder (1)/file.tar.gz',
	632	... '/p/Folder/file.tar.gz',
	633	... 123456]
	634	>>> sorted(paths, key=lambda x: natsort_key(x, as_path=True))
	635	[123456, '/p/Folder/file.tar.gz', '/p/Folder (1)/file.tar.gz']
	636
	637	Here Be Dragons: Adding Locale Support
	638	--------------------------------------
	639
	640	.. contents::
	641	:local:
	642
	643	Probably the most challenging special case I had to handle was getting
	644	:mod:`natsort` to handle sorting the non-numerical parts of input
	645	correctly, and also allowing it to sort the numerical bits in different
	646	locales. This was in no way what I originally set out to do with this
	647	library, so I was `caught a bit off guard when the request was initially made`_.
	648	I discovered the :mod:`locale` library, and assumed that if it's part of Python's
	649	StdLib there can't be too many dragons, right?
	650
	651	.. admonition:: INCOMPLETE LIST OF DRAGONS
	652
	653	- https://github.com/SethMMorton/natsort/issues/21
	654	- https://github.com/SethMMorton/natsort/issues/22
	655	- https://github.com/SethMMorton/natsort/issues/23
	656	- https://github.com/SethMMorton/natsort/issues/36
	657	- https://github.com/SethMMorton/natsort/issues/44
	658	- https://bugs.python.org/issue2481
	659	- https://bugs.python.org/issue23195
	660	- https://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help
	661	- https://stackoverflow.com/questions/22203550/sort-dictionary-by-key-using-locale-collation
	662	- https://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm
	663	- https://stackoverflow.com/questions/36431810/sort-numeric-lines-with-thousand-separators
	664	- https://stackoverflow.com/questions/45734562/how-can-i-get-a-reasonable-string-sorting-with-python
	665
	666	These can be summed up as follows:
	667
	668	#. :mod:`locale` is a thin wrapper over your operating system's locale
	669	library, so if that is broken (like it is on BSD and OSX) then
	670	:mod:`locale` is broken in Python.
	671	#. Because of a bug in legacy Python (i.e. Python 2), there is no uniform way to use
	672	the :mod:`locale` sorting functionality between legacy Python and Python 3.
	673	#. People have differing opinions of how capitalization should affect word order.
	674	#. There is no built-in way to handle locale-dependent thousands separators
	675	and decimal points robustly.
	676	#. Proper handling of Unicode is complicated.
	677	#. Proper handling of :mod:`locale` is complicated.
	678
	679	Easily over half of the the code in :mod:`natsort` is in some way dealing with some
	680	aspect of :mod:`locale` or basic case handling. It would have been
	681	impossible to get right without a `really good`_ `testing strategy`_.
	682
	683	Don't expect any more TL;DR's... if you want to see how all this is fully
	684	incorporated into the :mod:`natsort` algorithm then please take a look
	685	`at the code`_. However, I will hint at how specific steps are taken in
	686	each section.
	687
	688	Let's see how we can handle some of the dragons, one-by-one.
	689
	690	Basic Case Control Support
	691	++++++++++++++++++++++++++
	692
	693	Without even thinking about the mess that is adding :mod:`locale` support,
	694	:mod:`natsort` can introduce support for controlling how case is interpreted.
	695
	696	First, let's take a look at how it is sorted by default (due to
	697	where characters lie on the `ASCII table`_).
	698
	699	.. code-block:: python
	700
	701	>>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	702	>>> sorted(a)
	703	['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
	704
	705	All uppercase letters come before lowercase letters in the `ASCII table`_,
	706	so all capitalized words appear first. Not everyone agrees that this
	707	is the correct order. Some believe that the capitalized words should
	708	be last (``['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']``).
	709	Some believe that both the lowercase and uppercase versions
	710	should appear together (``['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']``).
	711	Some believe that both should be true ☹. Some people don't care at all [#f4]_.
	712
	713	Solving the first case (I call it LOWERCASEFIRST) is actually pretty
	714	easy... just call the :meth:`str.swapcase` method on the input.
	715
	716	.. code-block:: python
	717
	718	>>> sorted(a, key=lambda x: x.swapcase())
	719	['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
	720
	721	The last (i call it IGNORECASE) should be super easy, right?
	722	Simply call :meth:`str.lowercase` on the input. This will work but may
	723	not always give the correct answer on non-latin character sets. It's
	724	a good thing that in Python 3.3
	725	:meth:`str.casefold` was introduced, which does a better job of removing
	726	all case information from unicode characters in
	727	non-latin alphabets.
	728
	729	.. code-block:: python
	730
	731	>>> def remove_case(x):
	732	... try:
	733	... return x.casefold()
	734	... except AttributeError: # Legacy Python backwards compatibility
	735	... return x.lowercase()
	736	...
	737	>>> sorted(a, key=remove_case)
	738	['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']
	739
	740	The middle case (I call it GROUPLETTERS) is less straightforward.
	741	The most efficient way to handle this is to duplicate each character
	742	with its lowercase version and then the original character.
	743
	744	.. code-block:: python
	745
	746	>>> import itertools
	747	>>> def groupletters(x):
	748	... return ''.join(itertools.chain.from_iterable((remove_case(y), y) for y in x))
	749	...
	750	>>> groupletters('Apple')
	751	'aAppppllee'
	752	>>> groupletters('apple')
	753	'aappppllee'
	754	>>> sorted(a, key=groupletters)
	755	['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
	756
	757	The effect of this is that both ``'Apple'`` and ``'apple'`` are
	758	placed adjacent to each other because their transformations both begin
	759	with ``'a'``, and then the second character can be used to order them
	760	appropriately with respect to each other.
	761
	762	There's a problem with this, though. Within the context of :mod:`natsort`
	763	we are trying to correctly sort numbers and those should be left alone.
	764
	765	.. code-block:: python
	766
	767	>>> a = ['Apple5', 'apple', 'Apple4E10', 'Banana']
	768	>>> sorted(a, key=lambda x: natsort_key(x, as_float=True))
	769	['Apple5', 'Apple4E10', 'Banana', 'apple']
	770	>>> sorted(a, key=lambda x: natsort_key(groupletters(x), as_float=True))
	771	['Apple4E10', 'Apple5', 'apple', 'Banana']
	772	>>> groupletters('Apple4E10')
	773	'aAppppllee44eE1100'
	774
	775	We messed up the numbers! Looks like :func:`groupletters` needs to be applied
	776	after the strings are broken into their components. I'm not going to show
	777	how this is done here, but basically it requires applying the function in
	778	the ``else:`` block of :func:`coerce_to_int`/:func:`coerce_to_float`.
	779
	780	.. code-block:: python
	781
	782	>>> better_groupletters = natsort_keygen(alg=ns.GROUPLETTERS \| ns.REAL)
	783	>>> better_groupletters('Apple4E10')
	784	('aAppppllee', 40000000000.0)
	785	>>> sorted(a, key=better_groupletters)
	786	['Apple5', 'Apple4E10', 'apple', 'Banana']
	787
	788	Of course, applying both LOWERCASEFIRST and GROUPLETTERS is just
	789	a matter of turning on both functions.
	790
	791	Basic Unicode Support
	792	+++++++++++++++++++++
	793
	794	Unicode is hard and complicated. Here's an example.
	795
	796	.. code-block:: python
	797
	798	>>> b = [b'\x66', b'\x65', b'\xc3\xa9', b'\x65\xcc\x81', b'\x61', b'\x7a']
	799	>>> a = [x.decode('utf8') for x in b]
	800	>>> a # doctest: +SKIP
	801	['f', 'e', 'é', 'é', 'a', 'z']
	802	>>> sorted(a) # doctest: +SKIP
	803	['a', 'e', 'é', 'f', 'z', 'é']
	804
	805
	806	There are more than one way to represent the character 'é' in Unicode.
	807	In fact, many characters have multiple representations. This is a challenge
	808	because comparing the two representations would return ``False`` even though
	809	they look the same.
	810
	811	.. code-block:: python
	812
	813	>>> a[2] == a[3]
	814	False
	815
	816	Alas, since characters are compared based on the numerical value of their
	817	representation, sorting Unicode often gives unexpected results (like seeing
	818	'é' come both before and after 'z').
	819
	820	The original approach that :mod:`natsort` took with respect to non-ASCII
	821	Unicode characters was to say "just use
	822	the :mod:`locale` or :mod:`PyICU` library" and then cross it's fingers
	823	and hope those libraries take care of it. As you will find in the following
	824	sections, that comes with its own baggage, and turned out to not always work anyway
	825	(see https://stackoverflow.com/q/45734562/1399279). A more robust approach is to
	826	handle the Unicode out-of-the-box without invoking a heavy-handed library
	827	like :mod:`locale` or :mod:`PyICU`. To do this, we must use normalization.
	828
	829	To fully understand Unicode normalization, `check out some official Unicode documentation`_.
	830	Just kidding... that's too much text. The following StackOverflow answers do
	831	a good job at explaining Unicode normalization in simple terms:
	832	https://stackoverflow.com/a/7934397/1399279 and
	833	https://stackoverflow.com/a/7931547/1399279. Put simply, normalization
	834	ensures that Unicode characters with multiple representations are in
	835	some canonical and consistent representation so that (for example) comparisons
	836	of the characters can be performed in a sane way. The following discussion
	837	assumes you at least read the StackOverflow answers.
	838
	839	Looking back at our 'é' example, we can see that the two versions were
	840	constructed with the byte strings ``b'\xc3\xa9'`` and ``b'\x65\xcc\x81'``.
	841	The former representation is actually
	842	`LATIN SMALL LETTER E WITH ACUTE <http://www.fileformat.info/info/unicode/char/e9/index.htm>`_
	843	and is a single character in the Unicode standard. This is known as the
	844	compressed form and corresponds to the 'NFC' normalization scheme.
	845	The latter representation is actually the letter 'e' followed by
	846	`COMBINING ACUTE ACCENT <http://www.fileformat.info/info/unicode/char/0301/index.htm>`_
	847	and so is two characters in the Unicode standard. This is known as the
	848	decompressed form and corresponds to the 'NFD' normalization scheme.
	849	Since the first character in the decompressed form is actually the letter 'e',
	850	when compared to other ASCII characters it fits where you might expect.
	851	Unfortunately, all Unicode compressed form characters come after the
	852	ASCII characters and so they always will be placed after 'z' when sorting.
	853
	854	It seems that most Unicode data is stored and shared in the compressed form
	855	which makes it challenging to sort. This can be solved by normalizing all
	856	incoming Unicode data to the decompressed form ('NFD') and then sorting.
	857
	858	.. code-block:: python
	859
	860	>>> import unicodedata
	861	>>> c = [unicodedata.normalize('NFD', x) for x in a]
	862	>>> c # doctest: +SKIP
	863	['f', 'e', 'é', 'é', 'a', 'z']
	864	>>> sorted(c) # doctest: +SKIP
	865	['a', 'e', 'é', 'é', 'f', 'z']
	866
	867	Huzzah! Sane sorting without having to resort to :mod:`locale`!
	868
	869	Using Locale to Compare Strings
	870	+++++++++++++++++++++++++++++++
	871
	872	The :mod:`locale` module is actually pretty cool, and provides lowly
	873	spare-time programmers like myself a way to handle the daunting task
	874	of proper locale-dependent support of their libraries and utilities.
	875	Having said that, it can be a bit of a bear to get right,
	876	`although they do point out in the documentation that it will be painful to use`_.
	877	Aside from the caveats spelled out in that link, it turns out that just
	878	comparing strings with :mod:`locale` in a cross-platform and
	879	cross-python-version manner is not as straightforward as one might hope.
	880
	881	First, how to use :mod:`locale` to compare strings? It's actually
	882	pretty straightforward. Simply run the input through the :mod:`locale`
	883	transformation function :func:`locale.strxfrm`.
	884
	885	.. code-block:: python
	886
	887	>>> import locale, sys
	888	>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
	889	'en_US.UTF-8'
	890	>>> a = ['a', 'b', 'ä']
	891	>>> sorted(a)
	892	['a', 'b', 'ä']
	893	>>> # The below fails on OSX, so don't run doctest on darwin.
	894	>>> is_osx = sys.platform == 'darwin'
	895	>>> sorted(a, key=locale.strxfrm) if not is_osx else ['a', 'ä', 'b']
	896	['a', 'ä', 'b']
	897	>>>
	898	>>> a = ['apple', 'Banana', 'banana', 'Apple']
	899	>>> sorted(a, key=locale.strxfrm) if not is_osx else ['apple', 'Apple', 'banana', 'Banana']
	900	['apple', 'Apple', 'banana', 'Banana']
	901
	902	It turns out that locale-aware sorting groups numbers in the same
	903	way as turning on GROUPLETTERS and LOWERCASEFIRST.
	904	The trick is that you have to apply :func:`locale.strxfrm` only to non-numeric
	905	characters; otherwise, numbers won't be parsed properly. Therefore, it must
	906	be applied as part of the :func:`coerce_to_int`/:func:`coerce_to_float`
	907	functions in a manner similar to :func:`groupletters`.
	908
	909	As you might have guessed, there is a small problem.
	910	It turns out the there is a bug in the legacy Python implementation of
	911	:func:`locale.strxfrm` that causes it to outright fail for :func:`unicode`
	912	input (https://bugs.python.org/issue2481). :func:`locale.strcoll` works,
	913	but is intended for use with ``cmp``, which does not exist in current Python
	914	implementations. Luckily, the :func:`functools.cmp_to_key` function
	915	makes :func:`locale.strcoll` behave like :func:`locale.strxfrm` (that is, of course,
	916	unless you are on Python 2.6 where :func:`functools.cmp_to_key` doesn't exist,
	917	in which case you simply copy-paste the implementation from Python 2.7
	918	directly into your code ☹).
	919
	920	Handling Broken Locale On OSX
	921	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	922
	923	But what if the underlying locale implementation that :mod:`locale`
	924	relies upon is simply broken? It turns out that the locale library on
	925	OSX (and other BSD systems) is broken (and for some reason has never been
	926	fixed?), and so :mod:`locale` does not work as expected.
	927
	928	How do I define doesn't work as expected?
	929
	930	.. code-block:: python
	931
	932	>>> a = ['apple', 'Banana', 'banana', 'Apple']
	933	>>> sorted(a)
	934	['Apple', 'Banana', 'apple', 'banana']
	935	>>>
	936	>>> sorted(a, key=locale.strxfrm) if is_osx else sorted(a)
	937	['Apple', 'Banana', 'apple', 'banana']
	938
	939	IT'S SORTING AS IF :func:`locale.stfxfrm` WAS NEVER USED!! (and it's worse
	940	once non-ASCII characters get thrown into the mix.) I'm really not
	941	sure why this is considered OK for the OSX/BSD maintainers to not fix,
	942	but it's more than frustrating for poor developers who have been dragged
	943	into the locale game kicking and screaming. <deep breath>.
	944
	945	So, how to deal with this situation? There are two ways to do so.
	946
	947	#. Detect if :mod:`locale` is sorting incorrectly (i.e. ``dumb``) by seeing
	948	if ``'A'`` is sorted before ``'a'`` (incorrect) or not.
	949
	950	.. code-block:: python
	951
	952	>>> # This is genuinely the name of this function.
	953	>>> # See natsort.compat.locale.py
	954	>>> def dumb_sort():
	955	... return locale.strxfrm('A') < locale.strxfrm('a')
	956	...
	957
	958	If a ``dumb`` locale implementation is found, then automatically
	959	turn on LOWERCASEFIRST and GROUPLETTERS.
	960	#. Use an alternate library if installed. `ICU <http://site.icu-project.org/>`_
	961	is a great and powerful library that has a pretty decent Python port
	962	called (you guessed it) `PyICU <https://pypi.python.org/pypi/PyICU/>`_.
	963	If a user has this library installed on their computer, :mod:`natsort`
	964	chooses to use that instead of :mod:`locale`. With a little bit of
	965	planning, one can write a set of wrapper functions that call
	966	the correct library under the hood such that the business logic never
	967	has to know what library is being used (see `natsort.compat.locale.py`_).
	968
	969	Let me tell you, this little complication really makes a challenge of testing
	970	the code, since one must set up different environments on different operating
	971	systems in order to test all possible code paths. Not to mention that
	972	certain checks will fail for certain operating systems and environments
	973	so one must be diligent in either writing the tests not to fail, or ignoring
	974	those tests when on offending environments.
	975
	976	Handling Locale-Aware Numbers
	977	+++++++++++++++++++++++++++++
	978
	979	`Thousands separator support`_ is a problem that I knew would someday be
	980	requested but had decided to push off until a rainy day. One day it finally
	981	rained, and I decided to tackle the problem.
	982
	983	So what is the problem? Consider the number ``1,234,567`` (assuming the
	984	``','`` is the thousands separator). Try to run that through :func:`int`
	985	and you will get a :exc:`ValueError`. To handle this properly the thousands
	986	separators must be removed.
	987
	988	.. code-block:: python
	989
	990	>>> float('1,234,567'.replace(',', ''))
	991	1234567.0
	992
	993	What if, in our current locale, the thousands separator is ``'.'`` and
	994	the ``','`` is the decimal separator (like for the German locale de_DE)?
	995
	996	.. code-block:: python
	997
	998	>>> float('1.234.567'.replace('.', '').replace(',', '.'))
	999	1234567.0
	1000	>>> float('1.234.567,89'.replace('.', '').replace(',', '.'))
	1001	1234567.89
	1002
	1003	This is pretty much what :func:`locale.atoi` and :func:`locale.atof` do
	1004	under the hood. So what's the problem? Why doesn't :mod:`natsort` just
	1005	use this method under its hood?
	1006	Well, let's take a look at what would happen if we send some possible
	1007	:mod:`natsort` input through our the above function:
	1008
	1009	.. code-block:: python
	1010
	1011	>>> natsort_key('1,234 apples, please.'.replace(',', ''))
	1012	('', 1234, ' apples please.')
	1013	>>> natsort_key('Sir, €1.234,50 please.'.replace('.', '').replace(',', '.'), as_float=True)
	1014	('Sir. €', 1234.5, ' please')
	1015
	1016	Any character matching the thousands separator was dropped, and anything
	1017	matching the decimal separator was changed to ``'.'``! If these characters
	1018	were critical to how your data was ordered, this would break :mod:`natsort`.
	1019
	1020	The first solution one might consider would be to first decompose the
	1021	input into sub-components (like we did for the GROUPLETTERS method
	1022	above) and then only apply these transformations on the number components.
	1023	This is a chicken-and-egg problem, though, because *we cannot appropriately
	1024	separate out the numbers because of the thousands separators and
	1025	non-'.' decimal separators* (well, at least not without making multiple
	1026	passes over the data which I do not consider to be a valid option).
	1027
	1028	Regular expressions to the rescue! With regular expressions, we can
	1029	remove the thousands separators and change the decimal separator only
	1030	when they are actually within a number. Once the input has been
	1031	pre-processed with this regular expression, all the infrastructure
	1032	shown previously will work.
	1033
	1034	Beware, these regular expressions will make your eyes bleed.
	1035
	1036	.. code-block:: python
	1037
	1038	>>> decimal = ',' # Assume German locale, so decimal separator is ','
	1039	>>> # Look-behind assertions cannot accept range modifiers, so instead of i.e.
	1040	>>> # (?<!\.[0-9]{1,3}) I have to repeat the look-behind for 1, 2, and 3.
	1041	>>> nodecimal = r'(?<!{dec}[0-9])(?<!{dec}[0-9]{{2}})(?<!{dec}[0-9]{{3}})'.format(dec=decimal)
	1042	>>> strip_thousands = r'''
	1043	... (?<=[0-9]{{1}}) # At least 1 number
	1044	... (?<![0-9]{{4}}) # No more than 3 numbers
	1045	... {nodecimal} # Cannot follow decimal
	1046	... {thou} # The thousands separator
	1047	... (?=[0-9]{{3}} # Three numbers must follow
	1048	... ([^0-9]\|$) # But a non-number after that
	1049	... )
	1050	... '''.format(nodecimal=nodecimal, thou='.') # Thousands separator is '.' in German locale.
	1051	...
	1052	>>> re.sub(strip_thousands, '', 'Sir, €1.234,50 please.', flags=re.X)
	1053	'Sir, €1234,50 please.'
	1054	>>>
	1055	>>> # The decimal point must be preceded by a number or after
	1056	>>> # a number. This option only needs to be performed in the
	1057	>>> # case when the decimal separator for the locale is not '.'.
	1058	>>> switch_decimal = r'(?<=[0-9]){decimal}\|{decimal}(?=[0-9])'
	1059	>>> switch_decimal = switch_decimal.format(decimal=decimal)
	1060	>>> re.sub(switch_decimal, '.', 'Sir, €1234,50 please.', flags=re.X)
	1061	'Sir, €1234.50 please.'
	1062	>>>
	1063	>>> natsort_key('Sir, €1234.50 please.', as_float=True)
	1064	('Sir, €', 1234.5, ' please.')
	1065
	1066	Final Thoughts
	1067	--------------
	1068
	1069	My hope is that users of :mod:`natsort` never have to think about or worry
	1070	about all the bookkeeping or any of the details described above, and that using
	1071	:mod:`natsort` seems to magically "just work". For those of you who
	1072	took the time to read this engineering description, I hope it has enlightened
	1073	you to some of the issues that can be encountered when code is released
	1074	into the wild and has to accept "real-world data", or to what happens
	1075	to developers who naïvely make bold assumptions that are counter to
	1076	what the rest of the world assumes.
	1077
	1078	.. rubric:: Footnotes
	1079
	1080	.. [#f1]
	1081	To anyone looking through the actual code, you will note that I don't
	1082	actually use :mod:`pathlib` to split the paths... I wrote my own version
	1083	to avoid adding an external dependency of :mod:`pathlib` on Python < 3.4.
	1084	.. [#f2]
	1085	*"But if you hadn't removed the leading empty string from re.split this
	1086	wouldn't have happened!!"* I can hear you saying. Well, that's true. I don't
	1087	have a great reason for having done that except that in an earlier
	1088	non-optimal incarnation of the algorithm I needed to it, and it kind of
	1089	stuck, and it made other parts of the code easier if the assumption that
	1090	there were no empty strings was valid.
	1091	.. [#f3]
	1092	I'm not going to show how this is implemented in this document,
	1093	but if you are interested you can look at the code to
	1094	:func:`_sep_inserter` in `util.py`_.
	1095	.. [#f4]
	1096	Handling each of these is straightforward, but coupled with the rapidly
	1097	fracturing execution paths presented in :ref:`TL;DR 2 <tldr2>` one can imagine
	1098	this will get out of hand quickly. If you take a look at `natsort.py`_ and
	1099	`util.py`_ you can observe that to avoid this I take a more functional approach
	1100	to construting the :mod:`natsort` algorithm as opposed to the procedural approach
	1101	illustrated in :ref:`TL;DR 1 <tldr1>` and :ref:`TL;DR 2 <tldr2>`.
	1102
	1103	.. _ASCII table: http://www.asciitable.com/
	1104	.. _getting sorting right is surprisingly hard: http://www.compciv.org/guides/python/fundamentals/sorting-collections-with-sorted/
	1105	.. _This astonished: https://github.com/SethMMorton/natsort/issues/19
	1106	.. _a lot: http://stackoverflow.com/questions/29548742/python-natsort-sort-strings-recursively
	1107	.. _of people: http://stackoverflow.com/questions/24045348/sort-set-of-numbers-in-the-form-xx-yy-in-python
	1108	.. _and some people aren't very nice when they are astonished:
	1109	https://github.com/xolox/python-naturalsort/blob/ed3e6b6ffaca3bdea3b76e08acbb8bd2a5fee463/README.rst#why-another-natsort-module
	1110	.. _fastnumbers: https://github.com/SethMMorton/fastnumbers
	1111	.. _as part of my testing: https://github.com/SethMMorton/natsort/blob/master/test_natsort/slow_splitters.py
	1112	.. _this one for coercion: http://stackoverflow.com/questions/736043/checking-if-a-string-can-be-converted-to-float-in-python
	1113	.. _this one for checking: http://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float-in-python
	1114	.. _most natural sort solutions for python on Stack Overflow: http://stackoverflow.com/q/4836710/1399279
	1115	.. _80%/20%: https://en.wikipedia.org/wiki/Pareto_principle
	1116	.. _The first major special case I encountered was sorting filesystem paths: https://github.com/SethMMorton/natsort/issues/3
	1117	.. _The second major special case I encountered was sorting of different types: https://github.com/SethMMorton/natsort/issues/7
	1118	.. _A rather unexpected special case I encountered was sorting collections containing NaN:
	1119	https://github.com/SethMMorton/natsort/issues/27
	1120	.. _Path.parts: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.parts
	1121	.. _Path.suffixes: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffixes
	1122	.. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem
	1123	.. _It's hard to compare floating point numbers: http://www.drdobbs.com/cpp/its-hard-to-compare-floating-point-numbe/240149806
	1124	.. _caught a bit off guard when the request was initially made: https://github.com/SethMMorton/natsort/issues/14
	1125	.. _at the code: https://github.com/SethMMorton/natsort/tree/master/natsort
	1126	.. _natsort.py: https://github.com/SethMMorton/natsort/blob/master/natsort/natsort.py
	1127	.. _util.py: https://github.com/SethMMorton/natsort/blob/master/natsort/util.py
	1128	.. _although they do point out in the documentation that it will be painful to use:
	1129	https://docs.python.org/3/library/locale.html#background-details-hints-tips-and-caveats
	1130	.. _natsort.compat.locale.py: https://github.com/SethMMorton/natsort/blob/master/natsort/compat/locale.py
	1131	.. _Thousands separator support: https://github.com/SethMMorton/natsort/issues/36
	1132	.. _really good: https://hypothesis.readthedocs.io/en/latest/
	1133	.. _testing strategy: http://doc.pytest.org/en/latest/
	1134	.. _check out some official Unicode documentation: http://unicode.org/reports/tr15/

-2

docs/source/index.rst less more

2	2	You can adapt this file completely to your liking, but it should at least
3	3	contain the root `toctree` directive.
4	4
5		natsort: Natural Sorting for Python
6		===================================
	5	natsort: Simple yet flexible natural sorting in Python.
	6	=======================================================
7	7
8	8	Contents:
9	9

12	12	:numbered:
13	13
14	14	intro.rst
	15	howitworks.rst
15	16	examples.rst
16	17	api.rst
17	18	shell.rst

+314

-121

docs/source/intro.rst less more

3	3	The :mod:`natsort` module
4	4	=========================
5	5
6		Natural sorting for python.
	6	Simple yet flexible natural sorting in Python.
7	7
8	8	- Source Code: https://github.com/SethMMorton/natsort
9		- Downloads: https://pypi.python.org/pypi/natsort
10		- Documentation: http://pythonhosted.org/natsort/
	9	- Downloads: https://pypi.org/project/natsort/
	10	- Documentation: http://natsort.readthedocs.io/
	11	- Optional Dependencies:
	12
	13	- `fastnumbers <https://pypi.org/project/fastnumbers>`_ >= 2.0.0
	14	- `PyICU <https://pypi.org/project/PyICU>`_ >= 1.0.0
	15
	16	:mod:`natsort` is a general utility for sorting lists naturally; the definition
	17	of "naturally" is not well-defined, but the most common definition is that numbers
	18	contained within the string should be sorted as numbers and not as you would
	19	other characters. If you need to present sorted output to a user, you probably
	20	want to sort it naturally.
11	21
12	22	:mod:`natsort` was initially created for sorting scientific output filenames that
13		contained floating point numbers in the names. There was a serious lack of
	23	contained signed floating point numbers in the names. There was a lack of
14	24	algorithms out there that could perform a natural sort on `floats` but
15	25	plenty for `ints`; check out
16	26	`this StackOverflow question <http://stackoverflow.com/q/4836710/1399279>`_

18	28	`this ActiveState forum <http://code.activestate.com/recipes/285264-natural-string-sorting/>`_,
19	29	and of course `this great article on natural sorting <http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/>`_
20	30	from CodingHorror.com for examples of what I mean.
21		:mod:`natsort` was created to fill in this gap. It has since grown
22		and can now sort version numbers (which seems to be the
23		most common use case based on user feedback) as well as some other nice features.
	31	:mod:`natsort` was created to fill in this gap, but has since expanded to handle
	32	just about any definition of a number, as well as other sorting customizations.
24	33
25	34	Quick Description
26	35	-----------------
27	36
28	37	When you try to sort a list of strings that contain numbers, the normal python
29	38	sort algorithm sorts lexicographically, so you might not get the results that you
30		expect::
31
32		>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
	39	expect:
	40
	41	.. code-block:: python
	42
	43	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
33	44	>>> sorted(a)
34		['a1', 'a10', 'a2', 'a4', 'a9']
	45	['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in']
35	46
36	47	Notice that it has the order ('1', '10', '2') - this is because the list is
37	48	being sorted in lexicographical order, which sorts numbers like you would
38	49	letters (i.e. 'b', 'ba', 'c').
39	50
40	51	:mod:`natsort` provides a function :func:`~natsorted` that helps sort lists
41		"naturally", either as real numbers (i.e. signed/unsigned floats or ints),
42		or as versions. Using :func:`~natsorted` is simple::
	52	"naturally" ("naturally" is rather ill-defined, but in general it means
	53	sorting based on meaning and not computer code point)..
	54	Using :func:`~natsorted` is simple:
	55
	56	.. code-block:: python
43	57
44	58	>>> from natsort import natsorted
45		>>> a = ['a2', 'a9', 'a1', 'a4', 'a10']
46		>>> natsorted(a)
47		['a1', 'a2', 'a4', 'a9', 'a10']
	59	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	60	>>> natsorted(a)
	61	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
48	62
49	63	:func:`~natsorted` identifies numbers anywhere in a string and sorts them
50		naturally.
51
52		Sorting versions is handled properly by default (as of :mod:`natsort` version >= 4.0.0):
	64	naturally. Below are some other things you can do with :mod:`natsort`
	65	(please see the :ref:`examples` for a quick start guide, or the :ref:`api`
	66	for more details).
	67
	68	.. note::
	69
	70	:func:`~natsorted` is designed to be a drop-in replacement for the built-in
	71	:func:`sorted` function. Like :func:`sorted`, :func:`~natsorted`
	72	`does not sort in-place`. To sort a list and assign the output to the
	73	same variable, you must explicitly assign the output to a variable:
	74
	75	.. code-block:: python
	76
	77	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	78	>>> natsorted(a)
	79	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	80	>>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list
	81	['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	82	>>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a'
	83	>>> print(a)
	84	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	85
	86	Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for
	87	an alternate way to sort in-place naturally.
	88
	89	Examples
	90	--------
	91
	92	Sorting Versions
	93	++++++++++++++++
	94
	95	This is handled properly by default (as of :mod:`natsort` version >= 4.0.0):
53	96
54	97	.. code-block:: python
55	98

60	103	If you need to sort release candidates, please see :ref:`rc_sorting` for
61	104	a useful hack.
62	105
63		You can also perform locale-aware sorting (or "human sorting"), where the
64		non-numeric characters are ordered based on their meaning, not on their
65		ordinal value; this can be achieved with the :func:`~humansorted` function:
66
67		.. code-block:: python
68
69		>>> a = ['Apple', 'Banana', 'apple', 'banana']
70		>>> natsorted(a)
71		['Apple', 'Banana', 'apple', 'banana']
	106	Sorting by Real Numbers (i.e. Signed Floats)
	107	++++++++++++++++++++++++++++++++++++++++++++
	108
	109	This is useful in scientific data analysis and was
	110	the default behavior of :func:`~natsorted` for :mod:`natsort`
	111	version < 4.0.0. Use the :func:`~realsorted` function:
	112
	113	.. code-block:: python
	114
	115	>>> from natsort import realsorted, ns
	116	>>> # Note that when interpreting as signed floats, the below numbers are
	117	>>> # +5.10, -3.00, +5.30, +2.00
	118	>>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data']
	119	>>> natsorted(a)
	120	['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data']
	121	>>> natsorted(a, alg=ns.REAL)
	122	['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
	123	>>> realsorted(a) # shortcut for natsorted with alg=ns.REAL
	124	['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data']
	125
	126	Locale-Aware Sorting (or "Human Sorting")
	127	+++++++++++++++++++++++++++++++++++++++++
	128
	129	This is where the non-numeric characters are ordered based on their meaning,
	130	not on their ordinal value, and a locale-dependent thousands separator and decimal
	131	separator is accounted for in the number.
	132	This can be achieved with the :func:`~humansorted` function:
	133
	134	.. code-block:: python
	135
	136	>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
	137	>>> natsorted(a)
	138	['Apple', 'Banana', 'apple14,689', 'apple15', 'banana']
72	139	>>> import locale
73	140	>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
74	141	'en_US.UTF-8'
	142	>>> natsorted(a, alg=ns.LOCALE)
	143	['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
75	144	>>> from natsort import humansorted
76	145	>>> humansorted(a)
77		['apple', 'Apple', 'banana', 'Banana']
	146	['apple15', 'apple14,689', 'Apple', 'banana', 'Banana']
78	147
79	148	You may find you need to explicitly set the locale to get this to work
80	149	(as shown in the example).
81		Please see :ref:`bug_note` and the Installation section
	150	Please see :ref:`locale_issues` and the Installation section
82	151	below before using the :func:`~humansorted` function.
83	152
84		You can sort signed floats (i.e. real numbers) using the :func:`~realsorted`;
85		this is useful in scientific data analysis. This was the default behavior of
86		:func:`~natsorted` for :mod:`natsort` version < 4.0.0:
87
88		.. code-block:: python
89
90		>>> from natsort import realsorted
91		>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
92		>>> natsorted(a)
93		['num2', 'num5.3', 'num5.10', 'num-3']
94		>>> realsorted(a)
95		['num-3', 'num2', 'num5.10', 'num5.3']
	153	Further Customizing Natsort
	154	+++++++++++++++++++++++++++
	155
	156	If you need to combine multiple algorithm modifiers (such as ``ns.REAL``,
	157	``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the
	158	bitwise OR operator (``\|``). For example,
	159
	160	.. code-block:: python
	161
	162	>>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana']
	163	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE)
	164	['Apple', 'apple15', 'apple14,689', 'Banana', 'banana']
	165	>>> # The ns enum provides long and short forms for each option.
	166	>>> ns.LOCALE == ns.L
	167	True
	168	>>> # You can also customize the convenience functions, too.
	169	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE) == realsorted(a, alg=ns.L \| ns.IC)
	170	True
	171	>>> natsorted(a, alg=ns.REAL \| ns.LOCALE \| ns.IGNORECASE) == humansorted(a, alg=ns.R \| ns.IC)
	172	True
	173
	174	All of the available customizations can be found in the documentation for
	175	the :class:`~natsort.ns` enum.
	176
	177	You can also add your own custom transformation functions with the ``key`` argument.
	178	These can be used with ``alg`` if you wish:
	179
	180	.. code-block:: python
	181
	182	>>> a = ['apple2.50', '2.3apple']
	183	>>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL)
	184	['2.3apple', 'apple2.50']
	185
	186	Sorting Mixed Types
	187	+++++++++++++++++++
96	188
97	189	You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types
98		when you sort::
	190	when you sort:
	191
	192	.. code-block:: python
99	193
100	194	>>> a = ['4.5', 6, 2.0, '5', 'a']
101	195	>>> natsorted(a)

103	197	>>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a']
104	198	>>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError
105	199
	200	Handling Bytes on Python 3
	201	++++++++++++++++++++++++++
	202
106	203	:mod:`natsort` does not officially support the `bytes` type on Python 3, but
107		convenience functions are provided that help you decode to `str` first::
	204	convenience functions are provided that help you decode to `str` first:
	205
	206	.. code-block:: python
108	207
109	208	>>> from natsort import as_utf8
110	209	>>> a = [b'a', 14.0, 'b']

118	217	>>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56']
119	218	True
120	219
121		The natsort algorithm does other fancy things like
	220	Generating a Reusable Sorting Key and Sorting In-Place
	221	++++++++++++++++++++++++++++++++++++++++++++++++++++++
	222
	223	Under the hood, :func:`~natsorted` works by generating a custom sorting
	224	key using :func:`~natsort_keygen` and then passes that to the built-in
	225	:func:`sorted`. You can use the :func:`~natsort_keygen` function yourself to
	226	generate a custom sorting key to sort in-place using the :meth:`list.sort`
	227	method.
	228
	229	.. code-block:: python
	230
	231	>>> from natsort import natsort_keygen
	232	>>> natsort_key = natsort_keygen()
	233	>>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']
	234	>>> natsorted(a) == sorted(a, key=natsort_key)
	235	True
	236	>>> a.sort(key=natsort_key)
	237	>>> a
	238	['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in']
	239
	240	All of the algorithm customizations mentioned in the `Further Customizing Natsort`_
	241	section can also be applied to :func:`~natsort_keygen` through the alg keyword option.
	242
	243	Other Useful Things
	244	+++++++++++++++++++
122	245
123	246	- recursively descend into lists of lists
124		- control the case-sensitivity
125		- sort file paths correctly
126		- allow custom sorting keys
127		- exposes a natsort_key generator to pass to list.sort
128
129		Please see the :ref:`examples` for a quick start guide, or the :ref:`api`
130		for more details.
	247	- automatic unicode normalization of input data
	248	- controlling the case-sensitivity (see :ref:`case_sort`)
	249	- sorting file paths correctly (see :ref:`path_sort`)
	250	- allow custom sorting keys (see :ref:`custom_sort`)
	251
	252	FAQ
	253	---
	254
	255	How do I debug :func:`~natsorted`?
	256	The best way to debug :func:`~natsorted` is to generate a key using :func:`~natsort_keygen`
	257	with the same options being passed to :func:`~natsorted`. One can take a look at
	258	exactly what is being done with their input using this key - it is highly recommended
	259	to `look at this issue describing how to debug <https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375>`_
	260	for how to debug, and also to review the
	261	`How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_
	262	page for why :mod:`natsort` is doing that to your data.
	263
	264	If you are trying to sort custom classes and running into trouble, please take a look at
	265	https://github.com/SethMMorton/natsort/issues/60. In short,
	266	custom classes are not likely to be sorted correctly if one relies
	267	on the behavior of ``__lt__`` and the other rich comparison operators in their
	268	custom class - it is better to use a ``key`` function with :mod:`natsort`, or
	269	use the :mod:`natsort` key as part of your rich comparison operator definition.
	270
	271	How does :mod:`natsort` work?
	272	If you don't want to read `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_,
	273	here is a quick primer.
	274
	275	:mod:`natsort` provides a `key function <https://docs.python.org/3/howto/sorting.html#key-functions>`_
	276	that can be passed to `list.sort() <https://docs.python.org/3/library/stdtypes.html#list.sort>`_
	277	or `sorted() <https://docs.python.org/3/library/functions.html#sorted>`_ in order to
	278	modify the default sorting behavior. This key is generated on-demand with the
	279	key generator :func:`natsort.natsort_keygen`. :func:`natsort.natsorted` is essentially
	280	a wrapper for the following code:
	281
	282	.. code-block:: python
	283
	284	>>> from natsort import natsort_keygen
	285	>>> natsort_key = natsort_keygen()
	286	>>> sorted(['1', '10', '2'], key=natsort_key)
	287	['1', '2', '10']
	288
	289	Users can further customize :mod:`natsort` sorting behavior with the ``key``
	290	and/or ``alg`` options (see details in the `Further Customizing Natsort`_
	291	section).
	292
	293	The key generated by :func:`natsort.natsort_keygen` always returns a :class:`tuple`. It
	294	does so in the following way (some details omitted for clarity):
	295
	296	1. Assume the input is a string, and attempt to split it into numbers and
	297	non-numbers using regular expressions. Numbers are then converted into
	298	either :class:`int` or :class:`float`.
	299	2. If the above fails because the input is not a string, assume the input
	300	is some other sequence (e.g. :class:`list` or :class:`tuple`), and recursively
	301	apply the key to each element of the sequence.
	302	3. If the above fails because the input is not iterable, assume the input
	303	is an :class:`int` or :class:`float`, and just return the input in a :class:`tuple`.
	304
	305	Because a :class:`tuple` is always returned, a :exc:`TypeError` should not be common
	306	unless one tries to do something odd like sort an :class:`int` against a :class:`list`.
	307
	308	:mod:`natsort` gave me results I didn't expect, and it's a terrible library!
	309	Did you try to debug using the above advice? If so, and you still cannot figure out
	310	the error, then please `file an issue <https://github.com/SethMMorton/natsort/issues/new>`_.
	311
	312	Shell script
	313	------------
	314
	315	:mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called
	316	from the command line with ``python -m natsort``.
	317
	318	Requirements
	319	------------
	320
	321	:mod:`natsort` requires Python version 2.6 or greater or Python 3.3 or greater.
	322	It may run on (but is not tested against) Python 3.2.
	323
	324	Optional Dependencies
	325	---------------------
	326
	327	fastnumbers
	328	+++++++++++
	329
	330	The most efficient sorting can occur if you install the
	331	`fastnumbers <https://pypi.org/project/fastnumbers>`_ package
	332	(version >=0.7.1); it helps with the string to number conversions.
	333	:mod:`natsort` will still run (efficiently) without the package, but if you need
	334	to squeeze out that extra juice it is recommended you include this as a dependency.
	335	:mod:`natsort` will not require (or check) that
	336	`fastnumbers <https://pypi.org/project/fastnumbers>`_ is installed
	337	at installation.
	338
	339	PyICU
	340	+++++
	341
	342	It is recommended that you install `PyICU <https://pypi.org/project/PyICU>`_
	343	if you wish to sort in a locale-dependent manner, see
	344	http://natsort.readthedocs.io/en/master/locale_issues.html for an explanation why.
131	345
132	346	Installation
133	347	------------
134	348
135		Installation of :mod:`natsort` is ultra-easy. Simply execute from the
136		command line::
137
138		easy_install natsort
139
140		or, if you have ``pip`` (preferred over ``easy_install``)::
141
142		pip install natsort
143
144		Both of the above commands will download the source for you.
145
146		You can also download the source from http://pypi.python.org/pypi/natsort,
147		or browse the git repository at https://github.com/SethMMorton/natsort.
148
149		If you choose to install from source, you can unzip the source archive and
150		enter the directory, and type::
151
152		python setup.py install
153
154		If you wish to run the unit tests, enter::
155
156		python setup.py test
157
158		If you want to build this documentation, enter::
159
160		python setup.py build_sphinx
161
162		:mod:`natsort` requires Python version 2.7 or greater or Python 3.2 or greater.
163
164		The most efficient sorting can occur if you install the
165		`fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps
166		with the string to number conversions.) ``natsort`` will still run (efficiently)
167		without the package, but if you need to squeeze out that extra juice it is
168		recommended you include this as a dependency. ``natsort`` will not require (or
169		check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed.
170
171		On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library
172		can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is
173		used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this,
174		one can
175
176		1. Use "\.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\.UTF-8"
177		locale. These locales do not suffer from as many problems as "UTF-8"
178		and thus should give expected results.
179		2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If
180		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort``
181		will use it under the hood; this will give more
182		reliable cross-platform results in the long run. ``natsort`` will not
183		require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_
184		is installed at installation. Please visit
185		https://github.com/SethMMorton/natsort/issues/21 for more details and
186		how to install on Mac OS X. Please note that using
187		`PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to
188		guarantee correct results for all input on BSD-based systems, since
189		every other suggestion is a workaround.
190		3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured
191		to compensate for a broken ``locale`` library in terms of case-handling;
192		if you do not need to be able to properly handle non-ASCII characters
193		then this may be the best option for you.
194
195		Note that the above solutions should not be required for Windows or
196		Linux since in Linux-based systems and Windows systems ``locale`` should work
197		just fine.
198
199		:mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called
200		from the command line with ``python -m natsort``. The command line script is
201		only installed onto your ``PATH`` if you don't install via a wheel. There is
202		apparently a known bug with the wheel installation process that will not create
203		entry points.
	349	Use ``pip``!
	350
	351	.. code-block:: sh
	352
	353	$ pip install natsort
	354
	355	If you want to install the `Optional Dependencies`_, you can use the
	356	`"extras" notation <https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras>`_
	357	at installation time to install those dependencies as well - use ``fast`` for
	358	`fastnumbers <https://pypi.org/project/fastnumbers>`_ and ``icu`` for
	359	`PyICU <https://pypi.org/project/PyICU>`_.
	360
	361	.. code-block:: sh
	362
	363	# Install both optional dependencies.
	364	$ pip install natsort[fast,icu]
	365	# Install just fastnumbers
	366	$ pip install natsort[fast]
	367
	368	How to Run Tests
	369	----------------
	370
	371	Please note that :mod:`natsort` is NOT set-up to support ``python setup.py test``.
	372
	373	The recommended way to run tests is with `tox <https://tox.readthedocs.io/en/latest/>`_.
	374	After installing ``tox``, running tests is as simple as executing the following in the
	375	``natsort`` directory:
	376
	377	.. code-block:: sh
	378
	379	$ tox
	380
	381	``tox`` will create virtual a virtual environment for your tests and install all the
	382	needed testing requirements for you. You can specify a particular python version
	383	with the ``-e`` flag, e.g. ``tox -e py36``.
	384
	385	If you do not wish to use ``tox``, you can install the testing dependencies and run the
	386	tests manually using `pytest <https://docs.pytest.org/en/latest/>`_ - ``natsort``
	387	contains a ``Pipfile`` for use with `pipenv <https://github.com/pypa/pipenv>`_ that
	388	makes it easy for you to install the testing dependencies:
	389
	390	.. code-block:: sh
	391
	392	$ pipenv install --skip-lock --dev
	393	$ pipenv run python -m pytest
	394
	395	Note that above I invoked ``python -m pytest`` instead of just ``pytest`` - this is because
	396	`the former puts the CWD on sys.path <https://docs.pytest.org/en/latest/usage.html#calling-pytest-through-python-m-pytest>`_.⏎

+96

-0

docs/source/locale_issues.rst less more

	0	.. default-domain:: py
	1	.. currentmodule:: natsort
	2
	3	.. _locale_issues:
	4
	5	Possible Issues with :func:`~natsort.humansorted` or ``ns.LOCALE``
	6	==================================================================
	7
	8	Being Locale-Aware Means Both Numbers and Non-Numbers
	9	-----------------------------------------------------
	10
	11	In addition to modifying how characters are sorted, ``ns.LOCALE`` will take into
	12	account locale-dependent thousands separators (and locale-dependent decimal
	13	separators if ``ns.FLOAT`` is enabled). This means that if you are in a
	14	locale that uses commas as the thousands separator, a number like
	15	``123,456`` will be interpreted as ``123456``. If this is not what you want,
	16	you may consider using ``ns.LOCALEALPHA`` which will only enable locale-aware
	17	sorting for non-numbers (similarly, ``ns.LOCALENUM`` enables locale-aware
	18	sorting only for numbers).
	19
	20	Regenerate Key With :func:`~natsort.natsort_keygen` After Changing Locale
	21	-------------------------------------------------------------------------
	22
	23	When :func:`~natsort.natsort_keygen` is called it returns a key function that
	24	hard-codes the provided settings. This means that the key returned when
	25	``ns.LOCALE`` is used contins the settings specifed by the locale
	26	loaded at the time the key is generated. If you change the locale,
	27	you should regenerate the key to account for the new locale.
	28
	29	Corollary: Do Not Reuse :func:`~natsort.natsort_keygen` After Changing Locale
	30	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	31
	32	If you change locale, the old function will not work as expected.
	33	The `locale <https://docs.python.org/3.5/library/locale.html>`_ library works
	34	with a global state. When :func:`~natsort.natsort_keygen` is called it does the
	35	best job that it can to make the returned function as static as possible and
	36	independent of the global state, but the
	37	`strxfrm <https://docs.python.org/3.5/library/locale.html#locale.strxfrm>`_
	38	function must access this global state to work; therefore, if you change
	39	locale and use ``ns.LOCALE`` then you should discard the old key.
	40
	41	.. note:: If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ then you
	42	may be able to reuse keys after changing locale.
	43
	44	The `locale <https://docs.python.org/3.5/library/locale.html>`_ Module From the StdLib Has Issues
	45	-------------------------------------------------------------------------------------------------
	46
	47	:mod:`natsort` will use `PyICU <https://pypi.org/project/PyICU>`_ for
	48	:func:`~natsort.humansorted` or ``ns.LOCALE`` if it is installed. If not,
	49	it will fall back on the `locale <https://docs.python.org/3.5/library/locale.html>`_
	50	library from the Python stdlib. If you do not have
	51	`PyICU <https://pypi.org/project/PyICU>`_ installed, please keep the
	52	following known problems and issues in mind.
	53
	54	.. note:: Remember, if you have `PyICU <https://pypi.org/project/PyICU>`_
	55	installed you shouldn't need to worry about any of these.
	56
	57	Explicitly Set the Locale Before Using :func:`~natsort.humansorted` or ``ns.LOCALE``
	58	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	59
	60	I have found that unless you explicitly set a locale, the sorted order may not
	61	be what you expect. Setting this is straightforward
	62	(in the below example I use 'en_US.UTF-8', but you should use your
	63	locale)::
	64
	65	>>> import locale
	66	>>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
	67	'en_US.UTF-8'
	68
	69	.. _bug_note:
	70
	71	`locale <https://docs.python.org/3.5/library/locale.html>`_ Is Broken on Mac OS X
	72	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
	73
	74	It's not Python's fault, but the OS... the locale library for BSD-based systems
	75	(of which Mac OS X is one) is broken. See the following links:
	76
	77	- http://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help
	78	- http://bugs.python.org/issue23195
	79	- https://github.com/SethMMorton/natsort/issues/21 (contains instructons on installing)
	80	- http://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm
	81	- https://github.com/SethMMorton/natsort/issues/34
	82
	83	Of course, installing `PyICU <https://pypi.org/project/PyICU>`_ fixes this,
	84	but if you don't want to or cannot install this there is some hope.
	85
	86	1. As of ``natsort`` version 4.0.0, ``natsort`` is configured
	87	to compensate for a broken ``locale`` library. When sorting non-numbers
	88	it will handle case as you expect, but it will still not be able to
	89	comprehend non-ASCII characters properly. Additionally, it has
	90	a built-in lookup table of thousands separators that are incorrect
	91	on OS X/BSD (but is possible it is not complete... please file an
	92	issue if you see it is not complete)
	93	2. Use "\.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\.UTF-8"
	94	locale. I have found that these have fewer issues than "UTF-8", but
	95	your mileage may vary.

-32

~~docs/source/solar/NEWS.txt~~ less more

0		News
1		====
2
3		1.3
4		---
5		* Release date: 2012-11-01.
6		* Source Code Pro is now used for code samples.
7		* Reduced font size of pre elements.
8		* Horizontal rule for header elements.
9		* HTML pre contents are now wrapped (no scrollbars).
10		* Changed permalink color from black to a lighter one.
11
12		1.2
13		---
14		* Release date: 2012-10-03.
15		* Style additional admonition levels.
16		* Increase padding for navigation links (minor).
17		* Add shadow for admonition items (minor).
18
19		1.1
20		---
21		* Release date: 2012-09-05.
22		* Add a new background.
23		* Revert font of headings to Open Sans Light.
24		* Darker color for h3 - h6.
25		* Removed dependency on solarized dark pygments style.
26		* Nice looking scrollbars for pre element.
27
28		1.0
29		---
30		* Release date: 2012-08-24.
31		* Initial release.

-28

~~docs/source/solar/README.rst~~ less more

0		Solar theme for Python Sphinx
1		=============================
2		Solar is an attempt to create a theme for Sphinx based on the `Solarized <http://ethanschoonover.com/solarized>`_ color scheme.
3
4		Preview
5		-------
6		http://vimalkumar.in/sphinx-themes/solar
7
8		Download
9		--------
10		Released versions are available from http://github.com/vkvn/sphinx-themes/downloads
11
12		Installation
13		------------
14		#. Extract the archive.
15		#. Modify ``conf.py`` of an existing Sphinx project or create new project using ``sphinx-quickstart``.
16		#. Change the ``html_theme`` parameter to ``solar``.
17		#. Change the ``html_theme_path`` to the location containing the extracted archive.
18
19		License
20		-------
21		`GNU General Public License <http://www.gnu.org/licenses/gpl.html>`_.
22
23		Credits
24		-------
25		Modified from the default Sphinx theme -- Sphinxdoc
26
27		Background pattern from http://subtlepatterns.com.

-32

~~docs/source/solar/layout.html~~ less more

0		{% extends "basic/layout.html" %}
1
2		{%- block doctype -%}
3		<!DOCTYPE html>
4		{%- endblock -%}
5
6		{%- block extrahead -%}
7		<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro\|Open+Sans:300italic,400italic,700italic,400,300,700' rel='stylesheet' type='text/css'>
8		<link href="{{ pathto("_static/solarized-dark.css", 1) }}" rel="stylesheet">
9		{%- endblock -%}
10
11		{# put the sidebar before the body #}
12		{% block sidebar1 %}{{ sidebar() }}{% endblock %}
13		{% block sidebar2 %}{% endblock %}
14
15		{%- block footer %}
16		<div class="footer">
17		{%- if show_copyright %}
18		{%- if hasdoc('copyright') %}
19		{% trans path=pathto('copyright'), copyright=copyright\|e %}© <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %}
20		{%- else %}
21		{% trans copyright=copyright\|e %}© Copyright {{ copyright }}.{% endtrans %}
22		{%- endif %}
23		{%- endif %}
24		{%- if last_updated %}
25		{% trans last_updated=last_updated\|e %}Last updated on {{ last_updated }}.{% endtrans %}
26		{%- endif %}
27		{%- if show_sphinx %}
28		{% trans sphinx_version=sphinx_version\|e %}Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> {{ sphinx_version }}.Theme by <a href="http://github.com/vkvn">vkvn</a>{% endtrans %}
29		{%- endif %}
30		</div>
31		{%- endblock %}

-344

~~docs/source/solar/static/solar.css~~ less more

0		/* solar.css
1		* Modified from sphinxdoc.css of the sphinxdoc theme.
2		*/
3
4		@import url("basic.css");
5
6		/* -- page layout ----------------------------------------------------------- */
7
8		body {
9		font-family: 'Open Sans', sans-serif;
10		font-size: 14px;
11		line-height: 150%;
12		text-align: center;
13		color: #002b36;
14		padding: 0;
15		margin: 0px 80px 0px 80px;
16		min-width: 740px;
17		-moz-box-shadow: 0px 0px 10px #93a1a1;
18		-webkit-box-shadow: 0px 0px 10px #93a1a1;
19		box-shadow: 0px 0px 10px #93a1a1;
20		background: url("subtle_dots.png") repeat;
21
22		}
23
24		div.document {
25		background-color: #fcfcfc;
26		text-align: left;
27		background-repeat: repeat-x;
28		}
29
30		div.bodywrapper {
31		margin: 0 240px 0 0;
32		border-right: 1px dotted #eee8d5;
33		}
34
35		div.body {
36		background-color: white;
37		margin: 0;
38		padding: 0.5em 20px 20px 20px;
39		}
40
41		div.related {
42		font-size: 1em;
43		background: #002b36;
44		color: #839496;
45		padding: 5px 0px;
46		}
47
48		div.related ul {
49		height: 2em;
50		margin: 2px;
51		}
52
53		div.related ul li {
54		margin: 0;
55		padding: 0;
56		height: 2em;
57		float: left;
58		}
59
60		div.related ul li.right {
61		float: right;
62		margin-right: 5px;
63		}
64
65		div.related ul li a {
66		margin: 0;
67		padding: 2px 5px;
68		line-height: 2em;
69		text-decoration: none;
70		color: #839496;
71		}
72
73		div.related ul li a:hover {
74		background-color: #073642;
75		-webkit-border-radius: 2px;
76		-moz-border-radius: 2px;
77		border-radius: 2px;
78		}
79
80		div.sphinxsidebarwrapper {
81		padding: 0;
82		}
83
84		div.sphinxsidebar {
85		margin: 0;
86		padding: 0.5em 15px 15px 0;
87		width: 210px;
88		float: right;
89		font-size: 0.9em;
90		text-align: left;
91		}
92
93		div.sphinxsidebar h3, div.sphinxsidebar h4 {
94		margin: 1em 0 0.5em 0;
95		font-size: 1em;
96		padding: 0.7em;
97		background-color: #eeeff1;
98		}
99
100		div.sphinxsidebar h3 a {
101		color: #2E3436;
102		}
103
104		div.sphinxsidebar ul {
105		padding-left: 1.5em;
106		margin-top: 7px;
107		padding: 0;
108		line-height: 150%;
109		color: #586e75;
110		}
111
112		div.sphinxsidebar ul ul {
113		margin-left: 20px;
114		}
115
116		div.sphinxsidebar input {
117		border: 1px solid #eee8d5;
118		}
119
120		div.footer {
121		background-color: #93a1a1;
122		color: #eee;
123		padding: 3px 8px 3px 0;
124		clear: both;
125		font-size: 0.8em;
126		text-align: right;
127		}
128
129		div.footer a {
130		color: #eee;
131		text-decoration: none;
132		}
133
134		/* -- body styles ----------------------------------------------------------- */
135
136		p {
137		margin: 0.8em 0 0.5em 0;
138		}
139
140		div.body a, div.sphinxsidebarwrapper a {
141		color: #268bd2;
142		text-decoration: none;
143		}
144
145		div.body a:hover, div.sphinxsidebarwrapper a:hover {
146		border-bottom: 1px solid #268bd2;
147		}
148
149		h1, h2, h3, h4, h5, h6 {
150		font-family: "Open Sans", sans-serif;
151		font-weight: 300;
152		}
153
154		h1 {
155		margin: 0;
156		padding: 0.7em 0 0.3em 0;
157		line-height: 1.2em;
158		color: #002b36;
159		text-shadow: #eee 0.1em 0.1em 0.1em;
160		}
161
162		h2 {
163		margin: 1.3em 0 0.2em 0;
164		padding: 0 0 10px 0;
165		color: #073642;
166		border-bottom: 1px solid #eee;
167		}
168
169		h3 {
170		margin: 1em 0 -0.3em 0;
171		padding-bottom: 5px;
172		}
173
174		h3, h4, h5, h6 {
175		color: #073642;
176		border-bottom: 1px dotted #eee;
177		}
178
179		div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a {
180		color: #657B83!important;
181		}
182
183		h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor {
184		display: none;
185		margin: 0 0 0 0.3em;
186		padding: 0 0.2em 0 0.2em;
187		color: #aaa!important;
188		}
189
190		h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor,
191		h5:hover a.anchor, h6:hover a.anchor {
192		display: inline;
193		}
194
195		h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover,
196		h5 a.anchor:hover, h6 a.anchor:hover {
197		color: #777;
198		background-color: #eee;
199		}
200
201		a.headerlink {
202		color: #c60f0f!important;
203		font-size: 1em;
204		margin-left: 6px;
205		padding: 0 4px 0 4px;
206		text-decoration: none!important;
207		}
208
209		a.headerlink:hover {
210		background-color: #ccc;
211		color: white!important;
212		}
213
214
215		cite, code, tt {
216		font-family: 'Source Code Pro', monospace;
217		font-size: 0.9em;
218		letter-spacing: 0.01em;
219		background-color: #eeeff2;
220		font-style: normal;
221		}
222
223		hr {
224		border: 1px solid #eee;
225		margin: 2em;
226		}
227
228		.highlight {
229		-webkit-border-radius: 2px;
230		-moz-border-radius: 2px;
231		border-radius: 2px;
232		}
233
234		pre {
235		font-family: 'Source Code Pro', monospace;
236		font-style: normal;
237		font-size: 0.9em;
238		letter-spacing: 0.015em;
239		line-height: 120%;
240		padding: 0.7em;
241		white-space: pre-wrap; /* css-3 */
242		white-space: -moz-pre-wrap; /* Mozilla, since 1999 */
243		white-space: -pre-wrap; /* Opera 4-6 */
244		white-space: -o-pre-wrap; /* Opera 7 */
245		word-wrap: break-word; /* Internet Explorer 5.5+ */
246		}
247
248		pre a {
249		color: inherit;
250		text-decoration: underline;
251		}
252
253		td.linenos pre {
254		padding: 0.5em 0;
255		}
256
257		div.quotebar {
258		background-color: #f8f8f8;
259		max-width: 250px;
260		float: right;
261		padding: 2px 7px;
262		border: 1px solid #ccc;
263		}
264
265		div.topic {
266		background-color: #f8f8f8;
267		}
268
269		table {
270		border-collapse: collapse;
271		margin: 0 -0.5em 0 -0.5em;
272		}
273
274		table td, table th {
275		padding: 0.2em 0.5em 0.2em 0.5em;
276		}
277
278		div.admonition {
279		font-size: 0.9em;
280		margin: 1em 0 1em 0;
281		border: 1px solid #eee;
282		background-color: #f7f7f7;
283		padding: 0;
284		-moz-box-shadow: 0px 8px 6px -8px #93a1a1;
285		-webkit-box-shadow: 0px 8px 6px -8px #93a1a1;
286		box-shadow: 0px 8px 6px -8px #93a1a1;
287		}
288
289		div.admonition p {
290		margin: 0.5em 1em 0.5em 1em;
291		padding: 0.2em;
292		}
293
294		div.admonition pre {
295		margin: 0.4em 1em 0.4em 1em;
296		}
297
298		div.admonition p.admonition-title
299		{
300		margin: 0;
301		padding: 0.2em 0 0.2em 0.6em;
302		color: white;
303		border-bottom: 1px solid #eee8d5;
304		font-weight: bold;
305		background-color: #268bd2;
306		}
307
308		div.warning p.admonition-title,
309		div.important p.admonition-title {
310		background-color: #cb4b16;
311		}
312
313		div.hint p.admonition-title,
314		div.tip p.admonition-title {
315		background-color: #859900;
316		}
317
318		div.caution p.admonition-title,
319		div.attention p.admonition-title,
320		div.danger p.admonition-title,
321		div.error p.admonition-title {
322		background-color: #dc322f;
323		}
324
325		div.admonition ul, div.admonition ol {
326		margin: 0.1em 0.5em 0.5em 3em;
327		padding: 0;
328		}
329
330		div.versioninfo {
331		margin: 1em 0 0 0;
332		border: 1px solid #eee;
333		background-color: #DDEAF0;
334		padding: 8px;
335		line-height: 1.3em;
336		font-size: 0.9em;
337		}
338
339		div.viewcode-block:target {
340		background-color: #f4debf;
341		border-top: 1px solid #eee;
342		border-bottom: 1px solid #eee;
343		}

-84

~~docs/source/solar/static/solarized-dark.css~~ less more

0		/* solarized dark style for solar theme */
1
2		/style pre scrollbar/
3		pre::-webkit-scrollbar, .highlight::-webkit-scrollbar {
4		height: 0.5em;
5		background: #073642;
6		}
7
8		pre::-webkit-scrollbar-thumb {
9		border-radius: 1em;
10		background: #93a1a1;
11		}
12
13		/* pygments style */
14		.highlight .hll { background-color: #ffffcc }
15		.highlight { background: #002B36!important; color: #93A1A1 }
16		.highlight .c { color: #586E75 } /* Comment */
17		.highlight .err { color: #93A1A1 } /* Error */
18		.highlight .g { color: #93A1A1 } /* Generic */
19		.highlight .k { color: #859900 } /* Keyword */
20		.highlight .l { color: #93A1A1 } /* Literal */
21		.highlight .n { color: #93A1A1 } /* Name */
22		.highlight .o { color: #859900 } /* Operator */
23		.highlight .x { color: #CB4B16 } /* Other */
24		.highlight .p { color: #93A1A1 } /* Punctuation */
25		.highlight .cm { color: #586E75 } /* Comment.Multiline */
26		.highlight .cp { color: #859900 } /* Comment.Preproc */
27		.highlight .c1 { color: #586E75 } /* Comment.Single */
28		.highlight .cs { color: #859900 } /* Comment.Special */
29		.highlight .gd { color: #2AA198 } /* Generic.Deleted */
30		.highlight .ge { color: #93A1A1; font-style: italic } /* Generic.Emph */
31		.highlight .gr { color: #DC322F } /* Generic.Error */
32		.highlight .gh { color: #CB4B16 } /* Generic.Heading */
33		.highlight .gi { color: #859900 } /* Generic.Inserted */
34		.highlight .go { color: #93A1A1 } /* Generic.Output */
35		.highlight .gp { color: #93A1A1 } /* Generic.Prompt */
36		.highlight .gs { color: #93A1A1; font-weight: bold } /* Generic.Strong */
37		.highlight .gu { color: #CB4B16 } /* Generic.Subheading */
38		.highlight .gt { color: #93A1A1 } /* Generic.Traceback */
39		.highlight .kc { color: #CB4B16 } /* Keyword.Constant */
40		.highlight .kd { color: #268BD2 } /* Keyword.Declaration */
41		.highlight .kn { color: #859900 } /* Keyword.Namespace */
42		.highlight .kp { color: #859900 } /* Keyword.Pseudo */
43		.highlight .kr { color: #268BD2 } /* Keyword.Reserved */
44		.highlight .kt { color: #DC322F } /* Keyword.Type */
45		.highlight .ld { color: #93A1A1 } /* Literal.Date */
46		.highlight .m { color: #2AA198 } /* Literal.Number */
47		.highlight .s { color: #2AA198 } /* Literal.String */
48		.highlight .na { color: #93A1A1 } /* Name.Attribute */
49		.highlight .nb { color: #B58900 } /* Name.Builtin */
50		.highlight .nc { color: #268BD2 } /* Name.Class */
51		.highlight .no { color: #CB4B16 } /* Name.Constant */
52		.highlight .nd { color: #268BD2 } /* Name.Decorator */
53		.highlight .ni { color: #CB4B16 } /* Name.Entity */
54		.highlight .ne { color: #CB4B16 } /* Name.Exception */
55		.highlight .nf { color: #268BD2 } /* Name.Function */
56		.highlight .nl { color: #93A1A1 } /* Name.Label */
57		.highlight .nn { color: #93A1A1 } /* Name.Namespace */
58		.highlight .nx { color: #93A1A1 } /* Name.Other */
59		.highlight .py { color: #93A1A1 } /* Name.Property */
60		.highlight .nt { color: #268BD2 } /* Name.Tag */
61		.highlight .nv { color: #268BD2 } /* Name.Variable */
62		.highlight .ow { color: #859900 } /* Operator.Word */
63		.highlight .w { color: #93A1A1 } /* Text.Whitespace */
64		.highlight .mf { color: #2AA198 } /* Literal.Number.Float */
65		.highlight .mh { color: #2AA198 } /* Literal.Number.Hex */
66		.highlight .mi { color: #2AA198 } /* Literal.Number.Integer */
67		.highlight .mo { color: #2AA198 } /* Literal.Number.Oct */
68		.highlight .sb { color: #586E75 } /* Literal.String.Backtick */
69		.highlight .sc { color: #2AA198 } /* Literal.String.Char */
70		.highlight .sd { color: #93A1A1 } /* Literal.String.Doc */
71		.highlight .s2 { color: #2AA198 } /* Literal.String.Double */
72		.highlight .se { color: #CB4B16 } /* Literal.String.Escape */
73		.highlight .sh { color: #93A1A1 } /* Literal.String.Heredoc */
74		.highlight .si { color: #2AA198 } /* Literal.String.Interpol */
75		.highlight .sx { color: #2AA198 } /* Literal.String.Other */
76		.highlight .sr { color: #DC322F } /* Literal.String.Regex */
77		.highlight .s1 { color: #2AA198 } /* Literal.String.Single */
78		.highlight .ss { color: #2AA198 } /* Literal.String.Symbol */
79		.highlight .bp { color: #268BD2 } /* Name.Builtin.Pseudo */
80		.highlight .vc { color: #268BD2 } /* Name.Variable.Class */
81		.highlight .vg { color: #268BD2 } /* Name.Variable.Global */
82		.highlight .vi { color: #268BD2 } /* Name.Variable.Instance */
83		.highlight .il { color: #2AA198 } /* Literal.Number.Integer.Long */

~~docs/source/solar/static/subtle_dots.png~~ less more

Binary diff not shown

-4

~~docs/source/solar/theme.conf~~ less more

0		[theme]
1		inherit = basic
2		stylesheet = solar.css
3		pygments_style = none

docs/source/special_cases_everywhere.jpg less more

Binary diff not shown

+16

-2

natsort/__init__.py less more

6	6	)
7	7
8	8	# Local imports.
	9	import sys
	10
	11	from natsort.utils import chain_functions
	12	from natsort._version import __version__
	13
9	14	from natsort.natsort import (
10	15	natsort_key,
11	16	natsort_keygen,

23	28	as_utf8,
24	29	ns,
25	30	)
26		from natsort._version import __version__
	31
	32	if float(sys.version[:3]) < 3:
	33	from natsort.natsort import natcmp
27	34
28	35	__all__ = [
29	36	'natsort_key',
30	37	'natsort_keygen',
31	38	'natsorted',
32		'versorted'
	39	'versorted',
33	40	'humansorted',
34	41	'realsorted',
35	42	'index_natsorted',

38	45	'index_realsorted',
39	46	'order_by_index',
40	47	'decoder',
	48	'natcmp',
41	49	'as_ascii',
42	50	'as_utf8',
43	51	'ns',
	52	'chain_functions',
44	53	]
	54
	55	# Add the ns keys to this namespace for convenience.
	56	globals().update(
	57	dict((k, v) for k, v in vars(ns).items() if not k.startswith('_'))
	58	)

-11

natsort/__main__.py less more

10	10
11	11	# Local imports.
12	12	from natsort.natsort import natsorted, ns
13		from natsort.utils import _regex_and_num_function_chooser
	13	from natsort.utils import _regex_chooser
14	14	from natsort._version import __version__
15	15	from natsort.compat.py23 import py23_str
16	16

102	102	"""\
103	103	Verifies that that given range has a low lower than the high.
104	104	If the condition is not met, a ValueError is raised.
105		Otherwise, the values are returned, but as floats.
106		"""
107		low, high = float(low), float(high)
	105	Otherwise the input is returned as-is.
	106	"""
108	107	if low >= high:
109	108	raise ValueError('low >= high')
110	109	else:

116	115	Check that the low value of the filter is lower than the high.
117	116	If there is to be no filter, return 'None'.
118	117	If the condition is not met, a ValueError is raised.
119		Otherwise, the values are returned, but as floats.
	118	Otherwise, the values are returned as-is.
120	119	"""
121	120	# Quick return if no filter.
122	121	if not filt:

170	169	if do_filter or args.exclude:
171	170	inp_options = (ns.FLOAT * is_float \|
172	171	ns.SIGNED * signed \|
173		ns.NOEXP * (not args.exp),
174		'.'
	172	ns.NOEXP * (not args.exp)
175	173	)
176		regex, num_function = _regex_and_num_function_chooser[inp_options]
	174	regex = _regex_chooser[inp_options]
177	175	if args.filter is not None:
178	176	lows, highs = ([f[0] for f in args.filter],
179	177	[f[1] for f in args.filter])
180	178	entries = [entry for entry in entries
181	179	if keep_entry_range(entry, lows, highs,
182		num_function, regex)]
	180	float, regex)]
183	181	if args.reverse_filter is not None:
184	182	lows, highs = ([f[0] for f in args.reverse_filter],
185	183	[f[1] for f in args.reverse_filter])
186	184	entries = [entry for entry in entries
187	185	if not keep_entry_range(entry, lows, highs,
188		num_function, regex)]
	186	float, regex)]
189	187	if args.exclude:
190	188	exclude = set(args.exclude)
191	189	entries = [entry for entry in entries
192	190	if exclude_entry(entry, exclude,
193		num_function, regex)]
	191	float, regex)]
194	192
195	193	# Print off the sorted results
196	194	for entry in natsorted(entries, reverse=args.reverse, alg=alg):

-1

natsort/_version.py less more

5	5	absolute_import
6	6	)
7	7
8		__version__ = '4.0.3'
	8	__version__ = '5.3.3'

+48

-36

natsort/compat/fake_fastnumbers.py less more

11	11	)
12	12
13	13	# Std. lib imports.
14		import sys
15		import re
16	14	import unicodedata
17		float_re = re.compile(r'[-+]?(\d*\.?\d+(?:[eE][-+]?\d+)?\|inf(?:inity)?\|nan)$')
18		if sys.version[0] == '2':
19		int_re = re.compile(r'[-+]?\d+[lL]?$')
20		else:
21		int_re = re.compile(r'[-+]?\d+$')
	15	from natsort.unicode_numbers import decimal_chars
	16	from natsort.compat.py23 import PY_VERSION
	17	if PY_VERSION >= 3:
22	18	long = int
23		unicode = str
24	19
25	20
26		def fast_float(x, regex_matcher=float_re.match, uni=unicodedata.numeric):
27		"""Convert a string to a float quickly"""
28		if type(x) in (int, long, float):
29		return float(x)
30		elif regex_matcher(x):
31		return float(x)
32		elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None:
33		return uni(x)
34		else:
35		return x
	21	NAN_INF = ['INF', 'INf', 'Inf', 'inF', 'iNF', 'InF', 'inf', 'iNf',
	22	'NAN', 'nan', 'NaN', 'nAn', 'naN', 'NAn', 'nAN', 'Nan']
	23	NAN_INF.extend(['+'+x[:2] for x in NAN_INF] + ['-'+x[:2] for x in NAN_INF])
	24	NAN_INF = frozenset(NAN_INF)
	25	ASCII_NUMS = '0123456789+-'
36	26
37	27
38		def fast_int(x, regex_matcher=int_re.match, uni=unicodedata.digit):
	28	def fast_float(x, key=lambda x: x, nan=None,
	29	uni=unicodedata.numeric, nan_inf=NAN_INF,
	30	_first_char=frozenset(decimal_chars + list(ASCII_NUMS + '.'))):
	31	"""\
	32	Convert a string to a float quickly, return input as-is if not possible.
	33	We don't need to accept all input that the real fast_int accepts because
	34	the input will be controlled by the splitting algorithm.
	35	"""
	36	if x[0] in _first_char or x.lstrip()[:3] in nan_inf:
	37	try:
	38	x = float(x)
	39	return nan if nan is not None and x != x else x
	40	except ValueError:
	41	try:
	42	return uni(x, key(x)) if len(x) == 1 else key(x)
	43	except TypeError: # pragma: no cover
	44	return key(x)
	45	else:
	46	try:
	47	return uni(x, key(x)) if len(x) == 1 else key(x)
	48	except TypeError: # pragma: no cover
	49	return key(x)
	50
	51
	52	def fast_int(x, key=lambda x: x, nan=None, uni=unicodedata.digit,
	53	_first_char=frozenset(decimal_chars + list(ASCII_NUMS))):
39	54	"""\
40	55	Convert a string to a int quickly, return input as-is if not possible.
	56	We don't need to accept all input that the real fast_int accepts because
	57	the input will be controlled by the splitting algorithm.
41	58	"""
42		if type(x) in (int, long, float):
43		return int(x)
44		elif regex_matcher(x):
45		return int(x.rstrip('Ll'))
46		elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None:
47		return uni(x)
	59	if x[0] in _first_char:
	60	try:
	61	return long(x)
	62	except ValueError:
	63	try:
	64	return uni(x, key(x)) if len(x) == 1 else key(x)
	65	except TypeError: # pragma: no cover
	66	return key(x)
48	67	else:
49		return x
50
51
52		def isfloat(x, num_only=False):
53		"""Returns true if the input is a float, false otherwise."""
54		return type(x) == float
55
56
57		def isint(x, num_only=False):
58		"""Returns true if the input is an int, false otherwise."""
59		return type(x) in set([int, long])
	68	try:
	69	return uni(x, key(x)) if len(x) == 1 else key(x)
	70	except TypeError: # pragma: no cover
	71	return key(x)

-7

natsort/compat/fastnumbers.py less more

5	5	absolute_import
6	6	)
7	7
	8	from distutils.version import StrictVersion
	9
8	10	# If the user has fastnumbers installed, they will get great speed
9	11	# benefits. If not, we use the simulated functions that come with natsort.
10	12	try:
11	13	from fastnumbers import (
12	14	fast_float,
13	15	fast_int,
14		isint,
15		isfloat,
16	16	)
17	17	import fastnumbers
18		v = list(map(int, fastnumbers.__version__.split('.')))
19		if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0.
20		raise ImportError
	18	# Require >= version 0.7.1.
	19	if StrictVersion(fastnumbers.__version__) < StrictVersion('0.7.1'):
	20	raise ImportError # pragma: no cover
21	21	except ImportError:
22	22	from natsort.compat.fake_fastnumbers import (
23	23	fast_float,
24	24	fast_int,
25		isint,
26		isfloat,
27	25	)

+112

-28

natsort/compat/locale.py less more

5	5	absolute_import
6	6	)
7	7
8		# Std. lib imports
	8	# Std. lib imports.
9	9	import sys
10	10
11	11	# Local imports.
12		from natsort.compat.py23 import PY_VERSION, cmp_to_key
	12	from natsort.compat.py23 import (
	13	PY_VERSION,
	14	cmp_to_key,
	15	py23_unichr,
	16	)
	17
	18	# This string should be sorted after any other byte string because
	19	# it contains the max unicode character repeated 20 times.
	20	# You would need some odd data to come after that.
	21	null_string = ''
	22	null_string_max = py23_unichr(sys.maxunicode) * 20
13	23
14	24	# Make the strxfrm function from strcoll on Python2
15	25	# It can be buggy (especially on BSD-based systems),
16		# so prefer PyICU if available.
	26	# so prefer icu if available.
17	27	try:
18		import PyICU
	28	import icu
19	29	from locale import getlocale
20	30
21		# If using PyICU, get the locale from the current global locale,
22		# then create a sort key from that
23		def get_pyicu_transform(l, _d={}):
24		if l not in _d:
25		if l == (None, None):
26		c = PyICU.Collator.createInstance(PyICU.Locale())
27		else:
28		loc = '.'.join(l)
29		c = PyICU.Collator.createInstance(PyICU.Locale(loc))
30		_d[l] = c.getSortKey
31		return _d[l]
32		use_pyicu = True
33		null_string = b''
	31	null_string_locale = b''
	32
	33	# This string should in theory be sorted after any other byte
	34	# string because it contains the max byte char repeated many times.
	35	# You would need some odd data to come after that.
	36	null_string_locale_max = b'x7f' * 50
34	37
35	38	def dumb_sort():
36	39	return False
	40
	41	# If using icu, get the locale from the current global locale,
	42	def get_icu_locale():
	43	try:
	44	return icu.Locale('.'.join(getlocale()))
	45	except TypeError: # pragma: no cover
	46	return icu.Locale()
	47
	48	def get_strxfrm():
	49	return icu.Collator.createInstance(get_icu_locale()).getSortKey
	50
	51	def get_thousands_sep():
	52	sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol
	53	return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
	54
	55	def get_decimal_point():
	56	sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol
	57	return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep)
	58
37	59	except ImportError:
38		if sys.version[0] == '2':
	60	import locale
	61	if PY_VERSION < 3:
39	62	from locale import strcoll
40		strxfrm = cmp_to_key(strcoll)
41		null_string = strxfrm('')
	63	sentinel = object()
	64
	65	def custom_strcoll(a, b, last=sentinel):
	66	"""strcoll that can handle a sentinel that is always last."""
	67	if a is last:
	68	return 0 if a is b else 1
	69	elif b is last: # a cannot also be sentinel b/c above logic
	70	return -1
	71	else: # neither are sentinel
	72	return strcoll(a, b)
	73
	74	strxfrm = cmp_to_key(custom_strcoll)
	75	null_string_locale = strxfrm('')
	76	null_string_locale_max = strxfrm(sentinel)
42	77	else:
43	78	from locale import strxfrm
44		null_string = ''
45		use_pyicu = False
	79	null_string_locale = ''
	80
	81	# This string should be sorted after any other byte string because
	82	# it contains the max unicode character repeated 20 times.
	83	# You would need some odd data to come after that.
	84	null_string_locale_max = py23_unichr(sys.maxunicode) * 20
46	85
47	86	# On some systems, locale is broken and does not sort in the expected
48	87	# order. We will try to detect this and compensate.
49	88	def dumb_sort():
50	89	return strxfrm('A') < strxfrm('a')
51	90
	91	def get_strxfrm():
	92	return strxfrm
52	93
53		if PY_VERSION >= 3.3:
54		def _low(x):
55		return x.casefold()
56		else:
57		def _low(x):
58		return x.lower()
	94	def get_thousands_sep():
	95	sep = locale.localeconv()['thousands_sep']
	96	# If this locale library is broken, some of the thousands separator
	97	# characters are incorrectly blank. Here is a lookup table of the
	98	# corrections I am aware of.
	99	if dumb_sort():
	100	try:
	101	loc = '.'.join(locale.getlocale())
	102	except TypeError: # No locale loaded, default to ','
	103	return ','
	104	return {'de_DE.ISO8859-15': '.',
	105	'es_ES.ISO8859-1': '.',
	106	'de_AT.ISO8859-1': '.',
	107	'de_at': '\xa0',
	108	'nl_NL.UTF-8': '.',
	109	'es_es': '.',
	110	'fr_CH.ISO8859-15': '\xa0',
	111	'fr_CA.ISO8859-1': '\xa0',
	112	'de_CH.ISO8859-1': '.',
	113	'fr_FR.ISO8859-15': '\xa0',
	114	'nl_NL.ISO8859-1': '.',
	115	'ca_ES.UTF-8': '.',
	116	'nl_NL.ISO8859-15': '.',
	117	'de_ch': "'",
	118	'ca_es': '.',
	119	'de_AT.ISO8859-15': '.',
	120	'ca_ES.ISO8859-1': '.',
	121	'de_AT.UTF-8': '.',
	122	'es_ES.UTF-8': '.',
	123	'fr_fr': '\xa0',
	124	'es_ES.ISO8859-15': '.',
	125	'de_DE.ISO8859-1': '.',
	126	'nl_nl': '.',
	127	'fr_ch': '\xa0',
	128	'fr_ca': '\xa0',
	129	'de_DE.UTF-8': '.',
	130	'ca_ES.ISO8859-15': '.',
	131	'de_CH.ISO8859-15': '.',
	132	'fr_FR.ISO8859-1': '\xa0',
	133	'fr_CH.ISO8859-1': '\xa0',
	134	'de_de': '.',
	135	'fr_FR.UTF-8': '\xa0',
	136	'fr_CA.ISO8859-15': '\xa0',
	137	}.get(loc, sep)
	138	else:
	139	return sep
	140
	141	def get_decimal_point():
	142	return locale.localeconv()['decimal_point']

+14

-1

natsort/compat/py23.py less more

15	15
16	16	# Numeric form of version
17	17	PY_VERSION = float(sys.version[:3])
	18	NEWPY = PY_VERSION >= 3.3
18	19
19	20	# Assume all strings are Unicode in Python 2
20	21	py23_str = str if sys.version[0] == '3' else unicode

28	29	# unichr function
29	30	py23_unichr = chr if sys.version[0] == '3' else unichr
30	31
	32
	33	def _py23_cmp(a, b):
	34	return (a > b) - (a < b)
	35
	36
	37	py23_cmp = _py23_cmp if sys.version[0] == '3' else cmp
	38
31	39	# zip as an iterator
32	40	if sys.version[0] == '3':
33	41	py23_zip = zip
	42	py23_map = map
	43	py23_filter = filter
34	44	else:
35	45	import itertools
36	46	py23_zip = itertools.izip
	47	py23_map = itertools.imap
	48	py23_filter = itertools.ifilter
37	49
38	50
39	51	# cmp_to_key was not created till 2.7, so require this for 2.6

84	96	func = func_or_str
85	97	doc = func.__doc__
86	98
87		doc = str_change_func(doc)
	99	if doc is not None:
	100	doc = str_change_func(doc)
88	101
89	102	if func:
90	103	func.__doc__ = doc

-74

~~natsort/locale_help.py~~ less more

0		# -- coding: utf-8 --
1		"""\
2		This module is intended to help combine some locale functions
3		together for natsort consumption. It also accounts for Python2
4		and Python3 differences.
5		"""
6		from __future__ import (
7		print_function,
8		division,
9		unicode_literals,
10		absolute_import
11		)
12
13		# Std. lib imports.
14		from itertools import chain
15		from locale import localeconv
16
17		# Local imports.
18		from natsort.compat.locale import use_pyicu, _low
19		if use_pyicu:
20		from natsort.compat.locale import get_pyicu_transform, getlocale
21		else:
22		from natsort.compat.locale import strxfrm
23
24
25		def groupletters(x):
26		"""Double all characters, making doubled letters lowercase."""
27		return ''.join(chain.from_iterable([_low(y), y] for y in x))
28
29
30		def grouper(val, func):
31		"""\
32		Attempt to convert a string to a number. If the conversion
33		was not possible, run it through the letter grouper
34		to make the sorting work as requested.
35		"""
36		# Return the number or transformed string.
37		# If the input is identical to the output, then no conversion happened.
38		s = func[0](val)
39		return groupletters(s) if not func[1](s) else s
40
41
42		def locale_convert(val, func, group):
43		"""\
44		Attempt to convert a string to a number, first converting
45		the decimal place character if needed. Then, if the conversion
46		was not possible (i.e. it is not a number), run it through
47		strxfrm to make the work sorting as requested, possibly grouping first.
48		"""
49
50		# Format the number so that the conversion function can interpret it.
51		radix = localeconv()['decimal_point']
52		s = val.replace(radix, '.') if radix != '.' else val
53
54		# Perform the conversion
55		t = func[0](s)
56
57		# Return the number or transformed string.
58		# If the input is identical to the output, then no conversion happened.
59		# In this case, we don't want to return the function output because it
60		# may have had characters modified from the above 'replace' call,
61		# so we return the input.
62		if group:
63		if use_pyicu:
64		xfrm = get_pyicu_transform(getlocale())
65		return xfrm(groupletters(val)) if not func[1](t) else t
66		else:
67		return strxfrm(groupletters(val)) if not func[1](t) else t
68		else:
69		if use_pyicu:
70		xfrm = get_pyicu_transform(getlocale())
71		return xfrm(val) if not func[1](t) else t
72		else:
73		return strxfrm(val) if not func[1](t) else t

+213

-168

natsort/natsort.py less more

17	17	)
18	18
19	19	# Std lib. imports.
20		import re
21	20	from operator import itemgetter
22	21	from functools import partial
23	22	from warnings import warn
24	23
25	24	# Local imports.
	25	import sys
	26
	27	import natsort.compat.locale
26	28	from natsort.ns_enum import ns
27		from natsort.compat.py23 import u_format
	29	from natsort.compat.py23 import (
	30	u_format,
	31	py23_str,
	32	py23_cmp)
28	33	from natsort.utils import (
29	34	_natsort_key,
30	35	_args_to_enum,
31	36	_do_decoding,
	37	_regex_chooser,
	38	_parse_string_factory,
	39	_parse_path_factory,
	40	_parse_number_factory,
	41	_parse_bytes_factory,
	42	_input_string_transform_factory,
	43	_string_component_transform_factory,
	44	_final_data_transform_factory,
32	45	)
33	46
34	47	# Make sure the doctest works for either python2 or python3

66	79	True
67	80	>>> f(12345) == 12345
68	81	True
	82	>>> # On Python 3, without decoder this would return [b'a10', b'a2']
69	83	>>> natsorted([b'a10', b'a2'], key=decoder('utf8')) == [b'a2', b'a10']
70	84	True
71		>>> # On Python 3, without decoder this would return [b'a10', b'a2']
	85	>>> # On Python 3, without decoder this would raise a TypeError.
72	86	>>> natsorted([b'a10', 'a2'], key=decoder('utf8')) == ['a2', b'a10']
73	87	True
74		>>> # On Python 3, without decoder this would raise a TypeError.
75	88
76	89	"""
77	90	return partial(_do_decoding, encoding=encoding)

131	144	"""Undocumented, kept for backwards-compatibility."""
132	145	msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen"
133	146	warn(msg, DeprecationWarning)
134		return _natsort_key(val, key, _args_to_enum(**_kwargs) \| alg)
	147	return natsort_keygen(key, alg, **_kwargs)(val)
135	148
136	149
137	150	@u_format

145	158
146	159	The user may customize the generated function with the
147	160	arguments to `natsort_keygen`, including an optional
148		`key` function which will be called before the `natsort_key`.
	161	`key` function.
149	162
150	163	Parameters
151	164	----------

162	175	Returns
163	176	-------
164	177	out : function
165		A wrapped version of the `natsort_key` function that is
	178	A function that parses input for natural sorting that is
166	179	suitable for passing as the `key` argument to functions
167	180	such as `sorted`.
168	181

173	186	Examples
174	187	--------
175	188	`natsort_keygen` is a convenient way to create a custom key
176		to sort lists in-place (for example). Calling with no objects
177		will return a plain `natsort_key` instance::
	189	to sort lists in-place (for example).::
178	190
179	191	>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
180	192	>>> a.sort(key=natsort_keygen(alg=ns.REAL))

182	194	[{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3']
183	195
184	196	"""
185		return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) \| alg)
	197	# Transform old arguments to the ns enum.
	198	try:
	199	alg = _args_to_enum(**_kwargs) \| alg
	200	except TypeError:
	201	msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'"
	202	raise ValueError(msg+', got {0}'.format(py23_str(alg)))
	203
	204	# Add the _DUMB option if the locale library is broken.
	205	if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
	206	alg \|= ns._DUMB
	207
	208	# Set some variables that will be passed to the factory functions
	209	if alg & ns.NUMAFTER:
	210	if alg & ns.LOCALEALPHA:
	211	sep = natsort.compat.locale.null_string_locale_max
	212	else:
	213	sep = natsort.compat.locale.null_string_max
	214	pre_sep = natsort.compat.locale.null_string_max
	215	else:
	216	if alg & ns.LOCALEALPHA:
	217	sep = natsort.compat.locale.null_string_locale
	218	else:
	219	sep = natsort.compat.locale.null_string
	220	pre_sep = natsort.compat.locale.null_string
	221	regex = _regex_chooser[alg & ns._NUMERIC_ONLY]
	222
	223	# Create the functions that will be used to split strings.
	224	input_transform = _input_string_transform_factory(alg)
	225	component_transform = _string_component_transform_factory(alg)
	226	final_transform = _final_data_transform_factory(alg, sep, pre_sep)
	227
	228	# Create the high-level parsing functions for strings, bytes, and numbers.
	229	string_func = _parse_string_factory(
	230	alg, sep, regex.split,
	231	input_transform, component_transform, final_transform
	232	)
	233	if alg & ns.PATH:
	234	string_func = _parse_path_factory(string_func)
	235	bytes_func = _parse_bytes_factory(alg)
	236	num_func = _parse_number_factory(alg, sep, pre_sep)
	237
	238	# Return the natsort key with the parsing path pre-chosen.
	239	return partial(
	240	_natsort_key,
	241	key=key,
	242	string_func=string_func,
	243	bytes_func=bytes_func,
	244	num_func=num_func
	245	)
186	246
187	247
188	248	@u_format
189	249	def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs):
190	250	"""\
191		Sorts a sequence naturally.
192
193		Sorts a sequence naturally (alphabetically and numerically),
194		not lexicographically. Returns a new copy of the sorted
195		sequence as a list.
	251	Sorts an iterable naturally.
	252
	253	Sorts an iterable naturally (alphabetically and numerically),
	254	not lexicographically. Returns a list containing a sorted copy
	255	of the iterable.
	256
	257	Parameters
	258	----------
	259	seq : iterable
	260	The iterable to sort.
	261
	262	key : callable, optional
	263	A key used to determine how to sort each element of the iterable.
	264	It is not applied recursively.
	265	It should accept a single argument and return a single value.
	266
	267	reverse : {{True, False}}, optional
	268	Return the list in reversed sorted order. The default is
	269	`False`.
	270
	271	alg : ns enum, optional
	272	This option is used to control which algorithm `natsort`
	273	uses when sorting. For details into these options, please see
	274	the :class:`ns` class documentation. The default is `ns.INT`.
	275
	276	Returns
	277	-------
	278	out: list
	279	The sorted sequence.
	280
	281	See Also
	282	--------
	283	natsort_keygen : Generates the key that makes natural sorting possible.
	284	realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``.
	285	humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``.
	286	index_natsorted : Returns the sorted indexes from `natsorted`.
	287
	288	Examples
	289	--------
	290	Use `natsorted` just like the builtin `sorted`::
	291
	292	>>> a = ['num3', 'num5', 'num2']
	293	>>> natsorted(a)
	294	[{u}'num2', {u}'num3', {u}'num5']
	295
	296	"""
	297	natsort_key = natsort_keygen(key, alg, **_kwargs)
	298	return sorted(seq, reverse=reverse, key=natsort_key)
	299
	300
	301	@u_format
	302	def versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
	303	"""\
	304	Identical to :func:`natsorted`.
	305
	306	This function exists for backwards compatibility with `natsort`
	307	version < 4.0.0. Future development should use :func:`natsorted`.
	308
	309	See Also
	310	--------
	311	natsorted
	312
	313	"""
	314	return natsorted(seq, key, reverse, alg, **_kwargs)
	315
	316
	317	@u_format
	318	def humansorted(seq, key=None, reverse=False, alg=0):
	319	"""\
	320	Convenience function to properly sort non-numeric characters.
	321
	322	Convenience function to properly sort non-numeric characters
	323	in a locale-aware fashion (a.k.a "human sorting"). This is a
	324	wrapper around ``natsorted(seq, alg=ns.LOCALE)``.
196	325
197	326	Parameters
198	327	----------

211	340	alg : ns enum, optional
212	341	This option is used to control which algorithm `natsort`
213	342	uses when sorting. For details into these options, please see
214		the :class:`ns` class documentation. The default is `ns.INT`.
215
216		Returns
217		-------
218		out: list
219		The sorted sequence.
220
221		See Also
222		--------
223		natsort_keygen : Generates the key that makes natural sorting possible.
224		realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``.
225		humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``.
226		index_natsorted : Returns the sorted indexes from `natsorted`.
227
228		Examples
229		--------
230		Use `natsorted` just like the builtin `sorted`::
231
232		>>> a = ['num3', 'num5', 'num2']
233		>>> natsorted(a)
234		[{u}'num2', {u}'num3', {u}'num5']
235
236		"""
237		alg = _args_to_enum(**_kwargs) \| alg
238		try:
239		return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg=alg))
240		except TypeError as e: # pragma: no cover
241		# In the event of an unresolved "unorderable types" error
242		# for string to number type comparisons (not str/bytes),
243		# attempt to sort again, being careful to prevent this error.
244		r = re.compile(r'(?:str\|bytes) [<>] (?:str\|bytes)')
245		if 'unorderable types' in str(e) and not r.search(str(e)):
246		return sorted(seq, reverse=reverse,
247		key=natsort_keygen(key,
248		alg=alg \| ns.TYPESAFE))
249		else:
250		# Re-raise if the problem was not "unorderable types"
251		raise
252
253
254		@u_format
255		def versorted(seq, key=None, reverse=False, alg=0, **_kwargs):
256		"""\
257		Identical to :func:`natsorted`.
258
259		This function exists for backwards compatibility with `natsort`
260		version < 4.0.0. Future development should use :func:`natsorted`.
261
262		Please see the :func:`natsorted` documentation for use.
263
264		See Also
265		--------
266		natsorted
267
268		"""
269		return natsorted(seq, key, reverse, alg, **_kwargs)
270
271
272		@u_format
273		def humansorted(seq, key=None, reverse=False, alg=0):
274		"""\
275		Convenience function to properly sort non-numeric characters.
276
277		Convenience function to properly sort non-numeric characters
278		in a locale-aware fashion (a.k.a "human sorting"). This is a
279		wrapper around ``natsorted(seq, alg=ns.LOCALE)``.
280
281		.. warning:: On BSD-based systems (like Mac OS X), the underlying
282		C library that Python's locale module uses is broken.
283		On these systems it is recommended that you install
284		`PyICU <https://pypi.python.org/pypi/PyICU>`_
285		if you wish to use ``humansorted``, especially if you need
286		to handle non-ASCII characters. If you are on
287		one of systems and get unexpected results, please try
288		using `PyICU <https://pypi.python.org/pypi/PyICU>`_
289		before filing a bug report to `natsort`.
290
291		Parameters
292		----------
293		seq : iterable
294		The sequence to sort.
295
296		key : callable, optional
297		A key used to determine how to sort each element of the sequence.
298		It is not applied recursively.
299		It should accept a single argument and return a single value.
300
301		reverse : {{True, False}}, optional
302		Return the list in reversed sorted order. The default is
303		`False`.
304
305		alg : ns enum, optional
306		This option is used to control which algorithm `natsort`
307		uses when sorting. For details into these options, please see
308	343	the :class:`ns` class documentation. The default is `ns.LOCALE`.
309	344
310	345	Returns

318	353
319	354	Notes
320	355	-----
321		You may find that if you do not explicitly set
322		the locale your results may not be as you expect, although
323		as of ``natsort`` version 4.0.0 the sorting algorithm has been
324		updated to account for a buggy ``locale`` installation.
325		In the below example 'en_US.UTF-8' is used, but you should use your
326		locale::
327
328		>>> import locale
329		>>> # The 'str' call is only to get around a bug on Python 2.x
330		>>> # where 'setlocale' does not expect unicode strings (ironic,
331		>>> # right?)
332		>>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
333		'en_US.UTF-8'
334
335		It is preferred that you do this before importing `natsort`.
336		If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
337		above) then you should not need to do explicitly set a locale.
	356	Please read :ref:`locale_issues` before using `humansorted`.
338	357
339	358	Examples
340	359	--------

462	481	[{u}'baz', {u}'foo', {u}'bar']
463	482
464	483	"""
465		alg = _args_to_enum(**_kwargs) \| alg
466	484	if key is None:
467	485	newkey = itemgetter(1)
468	486	else:

470	488	return key(itemgetter(1)(x))
471	489	# Pair the index and sequence together, then sort by element
472	490	index_seq_pair = [[x, y] for x, y in enumerate(seq)]
473		try:
474		index_seq_pair.sort(reverse=reverse,
475		key=natsort_keygen(newkey, alg=alg))
476		except TypeError as e: # pragma: no cover
477		# In the event of an unresolved "unorderable types" error
478		# attempt to sort again, being careful to prevent this error.
479		if 'unorderable types' in str(e):
480		index_seq_pair.sort(reverse=reverse,
481		key=natsort_keygen(newkey,
482		alg=alg \| ns.TYPESAFE))
483		else:
484		# Re-raise if the problem was not "unorderable types"
485		raise
	491	index_seq_pair.sort(reverse=reverse,
	492	key=natsort_keygen(newkey, alg, **_kwargs))
486	493	return [x for x, _ in index_seq_pair]
487	494
488	495

517	524	of the given sequence.
518	525
519	526	This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``.
520		Please see the ``humansorted`` documentation for caveats of
521		using ``index_humansorted``.
522	527
523	528	Parameters
524	529	----------

551	556
552	557	Notes
553	558	-----
554		You may find that if you do not explicitly set
555		the locale your results may not be as you expect, although
556		as of ``natsort`` version 4.0.0 the sorting algorithm has been
557		updated to account for a buggy ``locale`` installation.
558		In the below example 'en_US.UTF-8' is used, but you should use your
559		locale::
560
561		>>> import locale
562		>>> # The 'str' call is only to get around a bug on Python 2.x
563		>>> # where 'setlocale' does not expect unicode strings (ironic,
564		>>> # right?)
565		>>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
566		'en_US.UTF-8'
567
568		It is preferred that you do this before importing `natsort`.
569		If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
570		above) then you should not need to explicitly set a locale.
	559	Please read :ref:`locale_issues` before using `humansorted`.
571	560
572	561	Examples
573	562	--------

644	633	"""\
645	634	Order a given sequence by an index sequence.
646	635
647		The output of `index_natsorted` and `index_versorted` is a
	636	The output of `index_natsorted` is a
648	637	sequence of integers (index) that correspond to how its input
649	638	sequence would be sorted. The idea is that this index can
650	639	be used to reorder multiple sequences by the sorted order of the

653	642
654	643	Parameters
655	644	----------
656		seq : iterable
	645	seq : sequence
657	646	The sequence to order.
658	647
659	648	index : iterable
660		The sequence that indicates how to order `seq`.
	649	The iterable that indicates how to order `seq`.
661	650	It should be the same length as `seq` and consist
662	651	of integers only.
663	652
664	653	iter : {{True, False}}, optional
665	654	If `True`, the ordered sequence is returned as a
666		generator expression; otherwise it is returned as a
	655	iterator; otherwise it is returned as a
667	656	list. The default is `False`.
668	657
669	658	Returns
670	659	-------
671		out : {{list, generator}}
672		The sequence ordered by `index`, as a `list` or as a
673		generator expression (depending on the value of `iter`).
	660	out : {{list, iterator}}
	661	The sequence ordered by `index`, as a `list` or as an
	662	iterator (depending on the value of `iter`).
674	663
675	664	See Also
676	665	--------
677	666	index_natsorted
678		index_versorted
679	667	index_humansorted
680	668	index_realsorted
681	669

683	671	--------
684	672
685	673	`order_by_index` is a convenience function that helps you apply
686		the result of `index_natsorted` or `index_versorted`::
	674	the result of `index_natsorted`::
687	675
688	676	>>> a = ['num3', 'num5', 'num2']
689	677	>>> b = ['foo', 'bar', 'baz']

698	686
699	687	"""
700	688	return (seq[i] for i in index) if iter else [seq[i] for i in index]
	689
	690
	691	if float(sys.version[:3]) < 3:
	692	# pylint: disable=unused-variable
	693	class natcmp(object):
	694	"""
	695	Compare two objects using a key and an algorithm.
	696
	697	Parameters
	698	----------
	699	x : object
	700	First object to compare.
	701
	702	y : object
	703	Second object to compare.
	704
	705	alg : ns enum, optional
	706	This option is used to control which algorithm `natsort`
	707	uses when sorting. For details into these options, please see
	708	the :class:`ns` class documentation. The default is `ns.INT`.
	709
	710	Returns
	711	-------
	712	out: int
	713	0 if x and y are equal, 1 if x > y, -1 if y > x.
	714
	715	See Also
	716	--------
	717	natsort_keygen : Generates a key that makes natural sorting possible.
	718
	719	Examples
	720	--------
	721	Use `natcmp` just like the builtin `cmp`::
	722
	723	>>> one = 1
	724	>>> two = 2
	725	>>> natcmp(one, two)
	726	-1
	727	"""
	728	cached_keys = {}
	729
	730	def __new__(cls, x, y, alg=0, args, *kwargs):
	731	try:
	732	alg = _args_to_enum(**kwargs) \| alg
	733	except TypeError:
	734	msg = ("natsort_keygen: 'alg' argument must be "
	735	"from the enum 'ns'")
	736	raise ValueError(msg + ', got {0}'.format(py23_str(alg)))
	737
	738	# Add the _DUMB option if the locale library is broken.
	739	if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort():
	740	alg \|= ns._DUMB
	741
	742	if alg not in cls.cached_keys:
	743	cls.cached_keys[alg] = natsort_keygen(alg=alg)
	744
	745	return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y))

+73

-77

natsort/ns_enum.py less more

14	14	This class acts like an enum to control the `natsort` algorithm. The
15	15	user may select several options simultaneously by or'ing the options
16	16	together. For example, to choose ``ns.INT``, ``ns.PATH``, and
17		``ns.LOCALE``, you could do ``ns.INT \| ns.LOCALE \| ns.PATH``.
	17	``ns.LOCALE``, you could do ``ns.INT \| ns.LOCALE \| ns.PATH``. Each
	18	function in the :mod:`natsort` package has an `alg` option that accepts
	19	this enum to allow fine control over how your input is sorted.
18	20
19	21	Each option has a shortened 1- or 2-letter form.
20	22
21		.. warning:: On BSD-based systems (like Mac OS X), the underlying
22		C library that Python's locale module uses is broken.
23		On these systems it is recommended that you install
24		`PyICU <https://pypi.python.org/pypi/PyICU>`_
25		if you wish to use ``LOCALE``, especially if you need
26		to handle non-ASCII characters. If you are on one of
27		systems and get unexpected results, please try using
28		`PyICU <https://pypi.python.org/pypi/PyICU>`_ before
29		filing a bug report to ``natsort``.
	23	.. note:: Please read :ref:`locale_issues` before using ``ns.LOCALE``.
30	24
31	25	Attributes
32	26	----------

36	30	Tell `natsort` to parse numbers as floats.
37	31	UNSIGNED, U (default)
38	32	Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate
39		left of a number. It is the same as setting the old `signed` option
40		to `False`. This is the default.
	33	left of a number. This is the default.
41	34	SIGNED, S
42	35	Tell `natsort` to take into account any sign (i.e. "-" or "+")
43		to the immediate left of a number. It is the same as setting
44		the old `signed` option to `True`.
45		VERSION, V
46		This is a shortcut for ``ns.INT \| ns.UNSIGNED``, which is useful
47		when attempting to sort version numbers. It is the same as
48		setting the old `number_type` option to `None`. Since
49		``ns.INT \| ns.UNSIGNED`` is default, this is is
50		unnecessary.
51		DIGIT, D
52		Same as `VERSION` above.
	36	to the immediate left of a number.
53	37	REAL, R
54	38	This is a shortcut for ``ns.FLOAT \| ns.SIGNED``, which is useful
55	39	when attempting to sort real numbers.
56	40	NOEXP, N
57		Tell `natsort` to not search for exponents as part of the number.
	41	Tell `natsort` to not search for exponents as part of a float number.
58	42	For example, with `NOEXP` the number "5.6E5" would be interpreted
59		as `5.6`, `"E"`, and `5`. It is the same as setting the old
60		`exp` option to `False`.
	43	as `5.6`, `"E"`, and `5` instead of `560000`.
	44	NUMAFTER, NA
	45	Tell `natsort` to sort numbers after non-numbers. By default
	46	numbers will be ordered before non-numbers.
61	47	PATH, P
62	48	Tell `natsort` to interpret strings as filesystem paths, so they
63	49	will be split according to the filesystem separator

67	53	sorted properly; 'Folder/' will be placed at the end, not at the
68	54	front. It is the same as setting the old `as_path` option to
69	55	`True`.
	56	COMPATIBILITYNORMALIZE, CN
	57	Use the "NFKD" unicode normalization form on input rather than the
	58	default "NFD". This will transform characters such as '⑦' into
	59	'7'. Please see https://stackoverflow.com/a/7934397/1399279,
	60	https://stackoverflow.com/a/7931547/1399279,
	61	and http://unicode.org/reports/tr15/ for full details into unicode
	62	normalization.
70	63	LOCALE, L
71		Tell `natsort` to be locale-aware when sorting strings (everything
72		that was not converted to a number). Your sorting results will vary
73		depending on your current locale. Generally, the `GROUPLETTERS`
74		option is not needed with `LOCALE` because the `locale` library
75		groups the letters in the same manner (although you may still
76		need `GROUPLETTERS` if there are numbers in your strings).
	64	Tell `natsort` to be locale-aware when sorting. This includes both
	65	proper sorting of alphabetical characters as well as proper
	66	handling of locale-dependent decimal separators and thousands
	67	separators. This is a shortcut for
	68	``ns.LOCALEALPHA \| ns.LOCALENUM``.
	69	Your sorting results will vary depending on your current locale.
	70	LOCALEALPHA, LA
	71	Tell `natsort` to be locale-aware when sorting, but only for
	72	alphabetical characters.
	73	LOCALENUM, LN
	74	Tell `natsort` to be locale-aware when sorting, but only for
	75	decimal separators and thousands separators.
77	76	IGNORECASE, IC
78	77	Tell `natsort` to ignore case when sorting. For example,
79	78	``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as

96	95	``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as
97	96	``['Apple', 'apple', 'Banana', 'banana']``.
98	97	Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST`
99		to reverse the order of upper and lower case.
	98	to reverse the order of upper and lower case. Generally not
	99	needed with `LOCALE`.
100	100	CAPITALFIRST, C
101	101	Only used when `LOCALE` is enabled. Tell `natsort` to put all
102	102	capitalized words before non-capitalized words. This is essentially

109	109	treat these as +Infinity and place them after all the other numbers.
110	110	By default, an NaN be treated as -Infinity and be placed first.
111	111	TYPESAFE, T
112		Try hard to avoid "unorderable types" error on Python 3. It
113		is the same as setting the old `py3_safe` option to `True`.
114		This is only needed if using ``SIGNED`` or if sorting by
115		``FLOAT``. You shouldn't need to use this unless you are using
116		``natsort_keygen``. NOTE: It cannot resolve the ``TypeError``
117		from trying to compare `str` and `bytes`.
	112	Deprecated as of `natsort` version 5.0.0; this option is now
	113	a no-op because it is always true.
	114	VERSION, V
	115	Deprecated as of `natsort` version 5.0.0; this option is now
	116	a no-op because it is the default.
	117	DIGIT, D
	118	Same as `VERSION` above.
118	119
119	120	Notes
120	121	-----
121		If using `LOCALE`, you may find that if you do not explicitly set
122		the locale your results may not be as you expect... I have found that
123		it depends on the system you are on. To do this is straightforward
124		(in the below example I use 'en_US.UTF-8', but you should use your
125		locale)::
	122	If you prefer to use `import natsort as ns` as opposed to
	123	`from natsort import natsorted, ns`, the `ns` options are
	124	available as top-level imports.
126	125
127		>>> import locale
128		>>> # The 'str' call is only to get around a bug on Python 2.x
129		>>> # where 'setlocale' does not expect unicode strings (ironic,
130		>>> # right?)
131		>>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8'))
132		'en_US.UTF-8'
133
134		It is preferred that you do this before importing `natsort`.
135		If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning
136		above) then you should not need to do this.
	126	>>> import natsort as ns
	127	>>> a = ['num5.10', 'num-3', 'num5.3', 'num2']
	128	>>> ns.natsorted(a, alg=ns.REAL) == ns.natsorted(a, alg=ns.ns.REAL)
	129	True
137	130
138	131	"""
139		pass
	132	# Following were previously now options but are now defaults.
	133	TYPESAFE = T = 0
	134	INT = I = 0
	135	VERSION = V = 0
	136	DIGIT = D = 0
	137	UNSIGNED = U = 0
140	138
	139	# The below are options. The values are stored as powers of two
	140	# so bitmasks can be used to extract the user's requested options.
	141	FLOAT = F = 1 << 0
	142	SIGNED = S = 1 << 1
	143	REAL = R = FLOAT \| SIGNED
	144	NOEXP = N = 1 << 2
	145	PATH = P = 1 << 3
	146	LOCALEALPHA = LA = 1 << 4
	147	LOCALENUM = LN = 1 << 5
	148	LOCALE = L = LOCALEALPHA \| LOCALENUM
	149	IGNORECASE = IC = 1 << 6
	150	LOWERCASEFIRST = LF = 1 << 7
	151	GROUPLETTERS = G = 1 << 8
	152	UNGROUPLETTERS = UG = 1 << 9
	153	CAPITALFIRST = C = UNGROUPLETTERS
	154	NANLAST = NL = 1 << 10
	155	COMPATIBILITYNORMALIZE = CN = 1 << 11
	156	NUMAFTER = NA = 1 << 12
141	157
142		# Sort algorithm "enum" values.
143		_ns = {
144		'INT': 0, 'I': 0,
145		'FLOAT': 1, 'F': 1,
146		'UNSIGNED': 0, 'U': 0,
147		'SIGNED': 2, 'S': 2,
148		'VERSION': 0, 'V': 0, # Shortcut for INT \| UNSIGNED
149		'DIGIT': 0, 'D': 0, # Shortcut for INT \| UNSIGNED
150		'REAL': 3, 'R': 3, # Shortcut for FLOAT \| SIGNED
151		'NOEXP': 4, 'N': 4,
152		'PATH': 8, 'P': 8,
153		'LOCALE': 16, 'L': 16,
154		'IGNORECASE': 32, 'IC': 32,
155		'LOWERCASEFIRST': 64, 'LF': 64,
156		'GROUPLETTERS': 128, 'G': 128,
157		'UNGROUPLETTERS': 256, 'UG': 256,
158		'CAPITALFIRST': 256, 'C': 256,
159		'NANLAST': 512, 'NL': 512,
160		'TYPESAFE': 2048, 'T': 2048,
161		}
162		# Populate the ns class with the _ns values.
163		for x, y in _ns.items():
164		setattr(ns, x, y)
	158	# The below are private options for internal use only.
	159	_NUMERIC_ONLY = REAL \| NOEXP
	160	_DUMB = 1 << 31

+297

-144

natsort/unicode_numbers.py less more

18	18	# Rather than determine this on the fly, which would incur a startup
19	19	# runtime penalty, the hex values of the Unicode numeric characters
20	20	# are hard-coded below.
21		numeric_hex = [
22		0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, 0X663, 0X664,
23		0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, 0X6F1, 0X6F2, 0X6F3, 0X6F4,
24		0X6F5, 0X6F6, 0X6F7, 0X6F8, 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4,
25		0X7C5, 0X7C6, 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A,
26		0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, 0X9E9, 0X9EA,
27		0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, 0X9F5, 0X9F6, 0X9F7, 0X9F8,
28		0X9F9, 0XA66, 0XA67, 0XA68, 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E,
29		0XA6F, 0XAE6, 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE,
30		0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, 0XB6D, 0XB6E,
31		0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, 0XB77, 0XBE6, 0XBE7, 0XBE8,
32		0XBE9, 0XBEA, 0XBEB, 0XBEC, 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2,
33		0XC66, 0XC67, 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F,
34		0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, 0XCE7, 0XCE8,
35		0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, 0XCEF, 0XD66, 0XD67, 0XD68,
36		0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72,
37		0XD73, 0XD74, 0XD75, 0XE50, 0XE51, 0XE52, 0XE53, 0XE54, 0XE55, 0XE56,
38		0XE57, 0XE58, 0XE59, 0XED0, 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6,
39		0XED7, 0XED8, 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26,
40		0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, 0XF2F, 0XF30,
41		0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, 0X1043, 0X1044, 0X1045,
42		0X1046, 0X1047, 0X1048, 0X1049, 0X1090, 0X1091, 0X1092, 0X1093, 0X1094,
43		0X1095, 0X1096, 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C,
44		0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, 0X1374, 0X1375,
45		0X1376, 0X1377, 0X1378, 0X1379, 0X137A, 0X137B, 0X137C, 0X16EE, 0X16EF,
46		0X16F0, 0X17E0, 0X17E1, 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7,
47		0X17E8, 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, 0X17F6,
48		0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, 0X1813, 0X1814, 0X1815,
49		0X1816, 0X1817, 0X1818, 0X1819, 0X1946, 0X1947, 0X1948, 0X1949, 0X194A,
50		0X194B, 0X194C, 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3,
51		0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, 0X1A80, 0X1A81,
52		0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, 0X1A87, 0X1A88, 0X1A89, 0X1A90,
53		0X1A91, 0X1A92, 0X1A93, 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99,
54		0X1B50, 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, 0X1B58,
55		0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, 0X1BB5, 0X1BB6, 0X1BB7,
56		0X1BB8, 0X1BB9, 0X1C40, 0X1C41, 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46,
57		0X1C47, 0X1C48, 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55,
58		0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, 0X2076, 0X2077,
59		0X2078, 0X2079, 0X2080, 0X2081, 0X2082, 0X2083, 0X2084, 0X2085, 0X2086,
60		0X2087, 0X2088, 0X2089, 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155,
61		0X2156, 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, 0X215E,
62		0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, 0X2165, 0X2166, 0X2167,
63		0X2168, 0X2169, 0X216A, 0X216B, 0X216C, 0X216D, 0X216E, 0X216F, 0X2170,
64		0X2171, 0X2172, 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179,
65		0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, 0X2181, 0X2182,
66		0X2185, 0X2186, 0X2187, 0X2188, 0X2189, 0X2460, 0X2461, 0X2462, 0X2463,
67		0X2464, 0X2465, 0X2466, 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C,
68		0X246D, 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, 0X2475,
69		0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, 0X247C, 0X247D, 0X247E,
70		0X247F, 0X2480, 0X2481, 0X2482, 0X2483, 0X2484, 0X2485, 0X2486, 0X2487,
71		0X2488, 0X2489, 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490,
72		0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, 0X2498, 0X2499,
73		0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, 0X24ED, 0X24EE, 0X24EF, 0X24F0,
74		0X24F1, 0X24F2, 0X24F3, 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9,
75		0X24FA, 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, 0X2778,
76		0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, 0X277F, 0X2780, 0X2781,
77		0X2782, 0X2783, 0X2784, 0X2785, 0X2786, 0X2787, 0X2788, 0X2789, 0X278A,
78		0X278B, 0X278C, 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793,
79		0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, 0X3026, 0X3027,
80		0X3028, 0X3029, 0X3038, 0X3039, 0X303A, 0X3192, 0X3193, 0X3194, 0X3195,
81		0X3220, 0X3221, 0X3222, 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228,
82		0X3229, 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, 0X324F,
83		0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, 0X3257, 0X3258, 0X3259,
84		0X325A, 0X325B, 0X325C, 0X325D, 0X325E, 0X325F, 0X3280, 0X3281, 0X3282,
85		0X3283, 0X3284, 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2,
86		0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, 0X32BA, 0X32BB,
87		0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, 0X3483, 0X382A, 0X3B4D, 0X4E00,
88		0X4E03, 0X4E07, 0X4E09, 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0,
89		0X4EDF, 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, 0X516D,
90		0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, 0X53C2, 0X53C3, 0X53C4,
91		0X56DB, 0X58F1, 0X58F9, 0X5E7A, 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E,
92		0X5F10, 0X62FE, 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C,
93		0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, 0X96F6, 0XA620,
94		0XA621, 0XA622, 0XA623, 0XA624, 0XA625, 0XA626, 0XA627, 0XA628, 0XA629,
95		0XA6E6, 0XA6E7, 0XA6E8, 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE,
96		0XA6EF, 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, 0XA8D1,
97		0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, 0XA8D8, 0XA8D9, 0XA900,
98		0XA901, 0XA902, 0XA903, 0XA904, 0XA905, 0XA906, 0XA907, 0XA908, 0XA909,
99		0XA9D0, 0XA9D1, 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8,
100		0XA9D9, 0XAA50, 0XAA51, 0XAA52, 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57,
101		0XAA58, 0XAA59, 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6,
102		0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, 0XF9D1, 0XF9D3,
103		0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, 0XFF14, 0XFF15, 0XFF16, 0XFF17,
104		0XFF18, 0XFF19, 0X10107, 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C,
105		0X1010D, 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, 0X10114,
106		0X10115, 0X10116, 0X10117, 0X10118, 0X10119, 0X1011A, 0X1011B, 0X1011C,
107		0X1011D, 0X1011E, 0X1011F, 0X10120, 0X10121, 0X10122, 0X10123, 0X10124,
108		0X10125, 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, 0X1012C,
109		0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, 0X10132, 0X10133, 0X10140,
110		0X10141, 0X10142, 0X10143, 0X10144, 0X10145, 0X10146, 0X10147, 0X10148,
111		0X10149, 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, 0X10150,
112		0X10151, 0X10152, 0X10153, 0X10154, 0X10155, 0X10156, 0X10157, 0X10158,
113		0X10159, 0X1015A, 0X1015B, 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160,
114		0X10161, 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, 0X10168,
115		0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, 0X1016E, 0X1016F, 0X10170,
116		0X10171, 0X10172, 0X10173, 0X10174, 0X10175, 0X10176, 0X10177, 0X10178,
117		0X1018A, 0X10320, 0X10321, 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1,
118		0X103D2, 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, 0X104A3,
119		0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, 0X104A9, 0X10858, 0X10859,
120		0X1085A, 0X1085B, 0X1085C, 0X1085D, 0X1085E, 0X1085F, 0X10916, 0X10917,
121		0X10918, 0X10919, 0X1091A, 0X1091B, 0X10A40, 0X10A41, 0X10A42, 0X10A43,
122		0X10A44, 0X10A45, 0X10A46, 0X10A47, 0X10A7D, 0X10A7E, 0X10B58, 0X10B59,
123		0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, 0X10B5F, 0X10B78, 0X10B79,
124		0X10B7A, 0X10B7B, 0X10B7C, 0X10B7D, 0X10B7E, 0X10B7F, 0X10E60, 0X10E61,
125		0X10E62, 0X10E63, 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69,
126		0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, 0X10E70, 0X10E71,
127		0X10E72, 0X10E73, 0X10E74, 0X10E75, 0X10E76, 0X10E77, 0X10E78, 0X10E79,
128		0X10E7A, 0X10E7B, 0X10E7C, 0X10E7D, 0X10E7E, 0X11052, 0X11053, 0X11054,
129		0X11055, 0X11056, 0X11057, 0X11058, 0X11059, 0X1105A, 0X1105B, 0X1105C,
130		0X1105D, 0X1105E, 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064,
131		0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, 0X1106B, 0X1106C,
132		0X1106D, 0X1106E, 0X1106F, 0X110F0, 0X110F1, 0X110F2, 0X110F3, 0X110F4,
133		0X110F5, 0X110F6, 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138,
134		0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, 0X1113F, 0X111D0,
135		0X111D1, 0X111D2, 0X111D3, 0X111D4, 0X111D5, 0X111D6, 0X111D7, 0X111D8,
136		0X111D9, 0X116C0, 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6,
137		0X116C7, 0X116C8, 0X116C9, 0X12400, 0X12401, 0X12402, 0X12403, 0X12404,
138		0X12405, 0X12406, 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C,
139		0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, 0X12413, 0X12414,
140		0X12415, 0X12416, 0X12417, 0X12418, 0X12419, 0X1241A, 0X1241B, 0X1241C,
141		0X1241D, 0X1241E, 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424,
142		0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, 0X1242B, 0X1242C,
143		0X1242D, 0X1242E, 0X1242F, 0X12430, 0X12431, 0X12432, 0X12433, 0X12434,
144		0X12435, 0X12436, 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C,
145		0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, 0X12443, 0X12444,
146		0X12445, 0X12446, 0X12447, 0X12448, 0X12449, 0X1244A, 0X1244B, 0X1244C,
147		0X1244D, 0X1244E, 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454,
148		0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, 0X1245B, 0X1245C,
149		0X1245D, 0X1245E, 0X1245F, 0X12460, 0X12461, 0X12462, 0X1D360, 0X1D361,
150		0X1D362, 0X1D363, 0X1D364, 0X1D365, 0X1D366, 0X1D367, 0X1D368, 0X1D369,
151		0X1D36A, 0X1D36B, 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371,
152		0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, 0X1D7D3, 0X1D7D4, 0X1D7D5,
153		0X1D7D6, 0X1D7D7, 0X1D7D8, 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD,
154		0X1D7DE, 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, 0X1D7E5,
155		0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, 0X1D7EB, 0X1D7EC, 0X1D7ED,
156		0X1D7EE, 0X1D7EF, 0X1D7F0, 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5,
157		0X1D7F6, 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD,
158		0X1D7FE, 0X1D7FF, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105,
159		0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X20001, 0X20064, 0X200E2,
160		0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19,
161		0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890,
162		]
	21	numeric_hex = (
	22	0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662,
	23	0X663, 0X664, 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0,
	24	0X6F1, 0X6F2, 0X6F3, 0X6F4, 0X6F5, 0X6F6, 0X6F7, 0X6F8,
	25	0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4, 0X7C5, 0X7C6,
	26	0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A,
	27	0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8,
	28	0X9E9, 0X9EA, 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4,
	29	0X9F5, 0X9F6, 0X9F7, 0X9F8, 0X9F9, 0XA66, 0XA67, 0XA68,
	30	0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E, 0XA6F, 0XAE6,
	31	0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE,
	32	0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C,
	33	0XB6D, 0XB6E, 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76,
	34	0XB77, 0XBE6, 0XBE7, 0XBE8, 0XBE9, 0XBEA, 0XBEB, 0XBEC,
	35	0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2, 0XC66, 0XC67,
	36	0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F,
	37	0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6,
	38	0XCE7, 0XCE8, 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE,
	39	0XCEF, 0XD58, 0XD59, 0XD5A, 0XD5B, 0XD5C, 0XD5D, 0XD5E,
	40	0XD66, 0XD67, 0XD68, 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D,
	41	0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72, 0XD73, 0XD74, 0XD75,
	42	0XD76, 0XD77, 0XD78, 0XDE6, 0XDE7, 0XDE8, 0XDE9, 0XDEA,
	43	0XDEB, 0XDEC, 0XDED, 0XDEE, 0XDEF, 0XE50, 0XE51, 0XE52,
	44	0XE53, 0XE54, 0XE55, 0XE56, 0XE57, 0XE58, 0XE59, 0XED0,
	45	0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6, 0XED7, 0XED8,
	46	0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26,
	47	0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E,
	48	0XF2F, 0XF30, 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042,
	49	0X1043, 0X1044, 0X1045, 0X1046, 0X1047, 0X1048, 0X1049,
	50	0X1090, 0X1091, 0X1092, 0X1093, 0X1094, 0X1095, 0X1096,
	51	0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C,
	52	0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373,
	53	0X1374, 0X1375, 0X1376, 0X1377, 0X1378, 0X1379, 0X137A,
	54	0X137B, 0X137C, 0X16EE, 0X16EF, 0X16F0, 0X17E0, 0X17E1,
	55	0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7, 0X17E8,
	56	0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5,
	57	0X17F6, 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812,
	58	0X1813, 0X1814, 0X1815, 0X1816, 0X1817, 0X1818, 0X1819,
	59	0X1946, 0X1947, 0X1948, 0X1949, 0X194A, 0X194B, 0X194C,
	60	0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3,
	61	0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA,
	62	0X1A80, 0X1A81, 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86,
	63	0X1A87, 0X1A88, 0X1A89, 0X1A90, 0X1A91, 0X1A92, 0X1A93,
	64	0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99, 0X1B50,
	65	0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57,
	66	0X1B58, 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4,
	67	0X1BB5, 0X1BB6, 0X1BB7, 0X1BB8, 0X1BB9, 0X1C40, 0X1C41,
	68	0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46, 0X1C47, 0X1C48,
	69	0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55,
	70	0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075,
	71	0X2076, 0X2077, 0X2078, 0X2079, 0X2080, 0X2081, 0X2082,
	72	0X2083, 0X2084, 0X2085, 0X2086, 0X2087, 0X2088, 0X2089,
	73	0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155, 0X2156,
	74	0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D,
	75	0X215E, 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164,
	76	0X2165, 0X2166, 0X2167, 0X2168, 0X2169, 0X216A, 0X216B,
	77	0X216C, 0X216D, 0X216E, 0X216F, 0X2170, 0X2171, 0X2172,
	78	0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179,
	79	0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180,
	80	0X2181, 0X2182, 0X2185, 0X2186, 0X2187, 0X2188, 0X2189,
	81	0X2460, 0X2461, 0X2462, 0X2463, 0X2464, 0X2465, 0X2466,
	82	0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C, 0X246D,
	83	0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474,
	84	0X2475, 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B,
	85	0X247C, 0X247D, 0X247E, 0X247F, 0X2480, 0X2481, 0X2482,
	86	0X2483, 0X2484, 0X2485, 0X2486, 0X2487, 0X2488, 0X2489,
	87	0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490,
	88	0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497,
	89	0X2498, 0X2499, 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC,
	90	0X24ED, 0X24EE, 0X24EF, 0X24F0, 0X24F1, 0X24F2, 0X24F3,
	91	0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9, 0X24FA,
	92	0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777,
	93	0X2778, 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E,
	94	0X277F, 0X2780, 0X2781, 0X2782, 0X2783, 0X2784, 0X2785,
	95	0X2786, 0X2787, 0X2788, 0X2789, 0X278A, 0X278B, 0X278C,
	96	0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793,
	97	0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025,
	98	0X3026, 0X3027, 0X3028, 0X3029, 0X3038, 0X3039, 0X303A,
	99	0X3192, 0X3193, 0X3194, 0X3195, 0X3220, 0X3221, 0X3222,
	100	0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228, 0X3229,
	101	0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E,
	102	0X324F, 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256,
	103	0X3257, 0X3258, 0X3259, 0X325A, 0X325B, 0X325C, 0X325D,
	104	0X325E, 0X325F, 0X3280, 0X3281, 0X3282, 0X3283, 0X3284,
	105	0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2,
	106	0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9,
	107	0X32BA, 0X32BB, 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405,
	108	0X3483, 0X382A, 0X3B4D, 0X4E00, 0X4E03, 0X4E07, 0X4E09,
	109	0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0, 0X4EDF,
	110	0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B,
	111	0X516D, 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1,
	112	0X53C2, 0X53C3, 0X53C4, 0X56DB, 0X58F1, 0X58F9, 0X5E7A,
	113	0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E, 0X5F10, 0X62FE,
	114	0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C,
	115	0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678,
	116	0X96F6, 0XA620, 0XA621, 0XA622, 0XA623, 0XA624, 0XA625,
	117	0XA626, 0XA627, 0XA628, 0XA629, 0XA6E6, 0XA6E7, 0XA6E8,
	118	0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE, 0XA6EF,
	119	0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0,
	120	0XA8D1, 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7,
	121	0XA8D8, 0XA8D9, 0XA900, 0XA901, 0XA902, 0XA903, 0XA904,
	122	0XA905, 0XA906, 0XA907, 0XA908, 0XA909, 0XA9D0, 0XA9D1,
	123	0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8,
	124	0XA9D9, 0XA9F0, 0XA9F1, 0XA9F2, 0XA9F3, 0XA9F4, 0XA9F5,
	125	0XA9F6, 0XA9F7, 0XA9F8, 0XA9F9, 0XAA50, 0XAA51, 0XAA52,
	126	0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57, 0XAA58, 0XAA59,
	127	0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6,
	128	0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2,
	129	0XF9D1, 0XF9D3, 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13,
	130	0XFF14, 0XFF15, 0XFF16, 0XFF17, 0XFF18, 0XFF19, 0X10107,
	131	0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C, 0X1010D,
	132	0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113,
	133	0X10114, 0X10115, 0X10116, 0X10117, 0X10118, 0X10119,
	134	0X1011A, 0X1011B, 0X1011C, 0X1011D, 0X1011E, 0X1011F,
	135	0X10120, 0X10121, 0X10122, 0X10123, 0X10124, 0X10125,
	136	0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B,
	137	0X1012C, 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131,
	138	0X10132, 0X10133, 0X10140, 0X10141, 0X10142, 0X10143,
	139	0X10144, 0X10145, 0X10146, 0X10147, 0X10148, 0X10149,
	140	0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F,
	141	0X10150, 0X10151, 0X10152, 0X10153, 0X10154, 0X10155,
	142	0X10156, 0X10157, 0X10158, 0X10159, 0X1015A, 0X1015B,
	143	0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160, 0X10161,
	144	0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167,
	145	0X10168, 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D,
	146	0X1016E, 0X1016F, 0X10170, 0X10171, 0X10172, 0X10173,
	147	0X10174, 0X10175, 0X10176, 0X10177, 0X10178, 0X1018A,
	148	0X1018B, 0X102E1, 0X102E2, 0X102E3, 0X102E4, 0X102E5,
	149	0X102E6, 0X102E7, 0X102E8, 0X102E9, 0X102EA, 0X102EB,
	150	0X102EC, 0X102ED, 0X102EE, 0X102EF, 0X102F0, 0X102F1,
	151	0X102F2, 0X102F3, 0X102F4, 0X102F5, 0X102F6, 0X102F7,
	152	0X102F8, 0X102F9, 0X102FA, 0X102FB, 0X10320, 0X10321,
	153	0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1, 0X103D2,
	154	0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2,
	155	0X104A3, 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8,
	156	0X104A9, 0X10858, 0X10859, 0X1085A, 0X1085B, 0X1085C,
	157	0X1085D, 0X1085E, 0X1085F, 0X10879, 0X1087A, 0X1087B,
	158	0X1087C, 0X1087D, 0X1087E, 0X1087F, 0X108A7, 0X108A8,
	159	0X108A9, 0X108AA, 0X108AB, 0X108AC, 0X108AD, 0X108AE,
	160	0X108AF, 0X108FB, 0X108FC, 0X108FD, 0X108FE, 0X108FF,
	161	0X10916, 0X10917, 0X10918, 0X10919, 0X1091A, 0X1091B,
	162	0X109BC, 0X109BD, 0X109C0, 0X109C1, 0X109C2, 0X109C3,
	163	0X109C4, 0X109C5, 0X109C6, 0X109C7, 0X109C8, 0X109C9,
	164	0X109CA, 0X109CB, 0X109CC, 0X109CD, 0X109CE, 0X109CF,
	165	0X109D2, 0X109D3, 0X109D4, 0X109D5, 0X109D6, 0X109D7,
	166	0X109D8, 0X109D9, 0X109DA, 0X109DB, 0X109DC, 0X109DD,
	167	0X109DE, 0X109DF, 0X109E0, 0X109E1, 0X109E2, 0X109E3,
	168	0X109E4, 0X109E5, 0X109E6, 0X109E7, 0X109E8, 0X109E9,
	169	0X109EA, 0X109EB, 0X109EC, 0X109ED, 0X109EE, 0X109EF,
	170	0X109F0, 0X109F1, 0X109F2, 0X109F3, 0X109F4, 0X109F5,
	171	0X109F6, 0X109F7, 0X109F8, 0X109F9, 0X109FA, 0X109FB,
	172	0X109FC, 0X109FD, 0X109FE, 0X109FF, 0X10A40, 0X10A41,
	173	0X10A42, 0X10A43, 0X10A44, 0X10A45, 0X10A46, 0X10A47,
	174	0X10A48, 0X10A7D, 0X10A7E, 0X10A9D, 0X10A9E, 0X10A9F,
	175	0X10AEB, 0X10AEC, 0X10AED, 0X10AEE, 0X10AEF, 0X10B58,
	176	0X10B59, 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E,
	177	0X10B5F, 0X10B78, 0X10B79, 0X10B7A, 0X10B7B, 0X10B7C,
	178	0X10B7D, 0X10B7E, 0X10B7F, 0X10BA9, 0X10BAA, 0X10BAB,
	179	0X10BAC, 0X10BAD, 0X10BAE, 0X10BAF, 0X10CFA, 0X10CFB,
	180	0X10CFC, 0X10CFD, 0X10CFE, 0X10CFF, 0X10D30, 0X10D31,
	181	0X10D32, 0X10D33, 0X10D34, 0X10D35, 0X10D36, 0X10D37,
	182	0X10D38, 0X10D39, 0X10E60, 0X10E61, 0X10E62, 0X10E63,
	183	0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69,
	184	0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F,
	185	0X10E70, 0X10E71, 0X10E72, 0X10E73, 0X10E74, 0X10E75,
	186	0X10E76, 0X10E77, 0X10E78, 0X10E79, 0X10E7A, 0X10E7B,
	187	0X10E7C, 0X10E7D, 0X10E7E, 0X10F1D, 0X10F1E, 0X10F1F,
	188	0X10F20, 0X10F21, 0X10F22, 0X10F23, 0X10F24, 0X10F25,
	189	0X10F26, 0X10F51, 0X10F52, 0X10F53, 0X10F54, 0X11052,
	190	0X11053, 0X11054, 0X11055, 0X11056, 0X11057, 0X11058,
	191	0X11059, 0X1105A, 0X1105B, 0X1105C, 0X1105D, 0X1105E,
	192	0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064,
	193	0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A,
	194	0X1106B, 0X1106C, 0X1106D, 0X1106E, 0X1106F, 0X110F0,
	195	0X110F1, 0X110F2, 0X110F3, 0X110F4, 0X110F5, 0X110F6,
	196	0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138,
	197	0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E,
	198	0X1113F, 0X111D0, 0X111D1, 0X111D2, 0X111D3, 0X111D4,
	199	0X111D5, 0X111D6, 0X111D7, 0X111D8, 0X111D9, 0X111E1,
	200	0X111E2, 0X111E3, 0X111E4, 0X111E5, 0X111E6, 0X111E7,
	201	0X111E8, 0X111E9, 0X111EA, 0X111EB, 0X111EC, 0X111ED,
	202	0X111EE, 0X111EF, 0X111F0, 0X111F1, 0X111F2, 0X111F3,
	203	0X111F4, 0X112F0, 0X112F1, 0X112F2, 0X112F3, 0X112F4,
	204	0X112F5, 0X112F6, 0X112F7, 0X112F8, 0X112F9, 0X11450,
	205	0X11451, 0X11452, 0X11453, 0X11454, 0X11455, 0X11456,
	206	0X11457, 0X11458, 0X11459, 0X114D0, 0X114D1, 0X114D2,
	207	0X114D3, 0X114D4, 0X114D5, 0X114D6, 0X114D7, 0X114D8,
	208	0X114D9, 0X11650, 0X11651, 0X11652, 0X11653, 0X11654,
	209	0X11655, 0X11656, 0X11657, 0X11658, 0X11659, 0X116C0,
	210	0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6,
	211	0X116C7, 0X116C8, 0X116C9, 0X11730, 0X11731, 0X11732,
	212	0X11733, 0X11734, 0X11735, 0X11736, 0X11737, 0X11738,
	213	0X11739, 0X1173A, 0X1173B, 0X118E0, 0X118E1, 0X118E2,
	214	0X118E3, 0X118E4, 0X118E5, 0X118E6, 0X118E7, 0X118E8,
	215	0X118E9, 0X118EA, 0X118EB, 0X118EC, 0X118ED, 0X118EE,
	216	0X118EF, 0X118F0, 0X118F1, 0X118F2, 0X11C50, 0X11C51,
	217	0X11C52, 0X11C53, 0X11C54, 0X11C55, 0X11C56, 0X11C57,
	218	0X11C58, 0X11C59, 0X11C5A, 0X11C5B, 0X11C5C, 0X11C5D,
	219	0X11C5E, 0X11C5F, 0X11C60, 0X11C61, 0X11C62, 0X11C63,
	220	0X11C64, 0X11C65, 0X11C66, 0X11C67, 0X11C68, 0X11C69,
	221	0X11C6A, 0X11C6B, 0X11C6C, 0X11D50, 0X11D51, 0X11D52,
	222	0X11D53, 0X11D54, 0X11D55, 0X11D56, 0X11D57, 0X11D58,
	223	0X11D59, 0X11DA0, 0X11DA1, 0X11DA2, 0X11DA3, 0X11DA4,
	224	0X11DA5, 0X11DA6, 0X11DA7, 0X11DA8, 0X11DA9, 0X12400,
	225	0X12401, 0X12402, 0X12403, 0X12404, 0X12405, 0X12406,
	226	0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C,
	227	0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412,
	228	0X12413, 0X12414, 0X12415, 0X12416, 0X12417, 0X12418,
	229	0X12419, 0X1241A, 0X1241B, 0X1241C, 0X1241D, 0X1241E,
	230	0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424,
	231	0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A,
	232	0X1242B, 0X1242C, 0X1242D, 0X1242E, 0X1242F, 0X12430,
	233	0X12431, 0X12432, 0X12433, 0X12434, 0X12435, 0X12436,
	234	0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C,
	235	0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442,
	236	0X12443, 0X12444, 0X12445, 0X12446, 0X12447, 0X12448,
	237	0X12449, 0X1244A, 0X1244B, 0X1244C, 0X1244D, 0X1244E,
	238	0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454,
	239	0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A,
	240	0X1245B, 0X1245C, 0X1245D, 0X1245E, 0X1245F, 0X12460,
	241	0X12461, 0X12462, 0X12463, 0X12464, 0X12465, 0X12466,
	242	0X12467, 0X12468, 0X12469, 0X1246A, 0X1246B, 0X1246C,
	243	0X1246D, 0X1246E, 0X16A60, 0X16A61, 0X16A62, 0X16A63,
	244	0X16A64, 0X16A65, 0X16A66, 0X16A67, 0X16A68, 0X16A69,
	245	0X16B50, 0X16B51, 0X16B52, 0X16B53, 0X16B54, 0X16B55,
	246	0X16B56, 0X16B57, 0X16B58, 0X16B59, 0X16B5B, 0X16B5C,
	247	0X16B5D, 0X16B5E, 0X16B5F, 0X16B60, 0X16B61, 0X16E80,
	248	0X16E81, 0X16E82, 0X16E83, 0X16E84, 0X16E85, 0X16E86,
	249	0X16E87, 0X16E88, 0X16E89, 0X16E8A, 0X16E8B, 0X16E8C,
	250	0X16E8D, 0X16E8E, 0X16E8F, 0X16E90, 0X16E91, 0X16E92,
	251	0X16E93, 0X16E94, 0X16E95, 0X16E96, 0X1D2E0, 0X1D2E1,
	252	0X1D2E2, 0X1D2E3, 0X1D2E4, 0X1D2E5, 0X1D2E6, 0X1D2E7,
	253	0X1D2E8, 0X1D2E9, 0X1D2EA, 0X1D2EB, 0X1D2EC, 0X1D2ED,
	254	0X1D2EE, 0X1D2EF, 0X1D2F0, 0X1D2F1, 0X1D2F2, 0X1D2F3,
	255	0X1D360, 0X1D361, 0X1D362, 0X1D363, 0X1D364, 0X1D365,
	256	0X1D366, 0X1D367, 0X1D368, 0X1D369, 0X1D36A, 0X1D36B,
	257	0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371,
	258	0X1D372, 0X1D373, 0X1D374, 0X1D375, 0X1D376, 0X1D377,
	259	0X1D378, 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2,
	260	0X1D7D3, 0X1D7D4, 0X1D7D5, 0X1D7D6, 0X1D7D7, 0X1D7D8,
	261	0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD, 0X1D7DE,
	262	0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4,
	263	0X1D7E5, 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA,
	264	0X1D7EB, 0X1D7EC, 0X1D7ED, 0X1D7EE, 0X1D7EF, 0X1D7F0,
	265	0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5, 0X1D7F6,
	266	0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC,
	267	0X1D7FD, 0X1D7FE, 0X1D7FF, 0X1E8C7, 0X1E8C8, 0X1E8C9,
	268	0X1E8CA, 0X1E8CB, 0X1E8CC, 0X1E8CD, 0X1E8CE, 0X1E8CF,
	269	0X1E950, 0X1E951, 0X1E952, 0X1E953, 0X1E954, 0X1E955,
	270	0X1E956, 0X1E957, 0X1E958, 0X1E959, 0X1EC71, 0X1EC72,
	271	0X1EC73, 0X1EC74, 0X1EC75, 0X1EC76, 0X1EC77, 0X1EC78,
	272	0X1EC79, 0X1EC7A, 0X1EC7B, 0X1EC7C, 0X1EC7D, 0X1EC7E,
	273	0X1EC7F, 0X1EC80, 0X1EC81, 0X1EC82, 0X1EC83, 0X1EC84,
	274	0X1EC85, 0X1EC86, 0X1EC87, 0X1EC88, 0X1EC89, 0X1EC8A,
	275	0X1EC8B, 0X1EC8C, 0X1EC8D, 0X1EC8E, 0X1EC8F, 0X1EC90,
	276	0X1EC91, 0X1EC92, 0X1EC93, 0X1EC94, 0X1EC95, 0X1EC96,
	277	0X1EC97, 0X1EC98, 0X1EC99, 0X1EC9A, 0X1EC9B, 0X1EC9C,
	278	0X1EC9D, 0X1EC9E, 0X1EC9F, 0X1ECA0, 0X1ECA1, 0X1ECA2,
	279	0X1ECA3, 0X1ECA4, 0X1ECA5, 0X1ECA6, 0X1ECA7, 0X1ECA8,
	280	0X1ECA9, 0X1ECAA, 0X1ECAB, 0X1ECAD, 0X1ECAE, 0X1ECAF,
	281	0X1ECB1, 0X1ECB2, 0X1ECB3, 0X1ECB4, 0X1F100, 0X1F101,
	282	0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107,
	283	0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001,
	284	0X20064, 0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C,
	285	0X2099C, 0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998,
	286	0X23B1B, 0X2626D, 0X2F890,
	287	)
163	288
164	289	# Convert each hex into the literal Unicode character.
165	290	# Stop if a ValueError is raised in case of a narrow Unicode build.

169	294	for a in numeric_hex:
170	295	try:
171	296	l = py23_unichr(a)
172		except ValueError:
	297	except ValueError: # pragma: no cover
173	298	break
174	299	if unicodedata.numeric(l, None) is None:
175		continue
	300	continue # pragma: no cover
176	301	numeric_chars.append(l)
177	302
178	303	# The digit characters are a subset of the numerals.
179	304	digit_chars = [a for a in numeric_chars
180	305	if unicodedata.digit(a, None) is not None]
181	306
	307	# The decimal characters are a subset of the numberals
	308	# (probably of the digits, but let's be safe).
	309	decimal_chars = [a for a in numeric_chars
	310	if unicodedata.decimal(a, None) is not None]
	311
182	312	# Create a single string with the above data.
	313	decimals = ''.join(decimal_chars)
183	314	digits = ''.join(digit_chars)
184	315	numeric = ''.join(numeric_chars)
	316	digits_no_decimals = ''.join([x for x in digits if x not in decimals])
	317	numeric_no_decimals = ''.join([x for x in numeric if x not in decimals])
	318
	319	# Some code that can be used to create the above list of hex numbers.
	320	if __name__ == '__main__':
	321	import textwrap
	322	from natsort.compat.py23 import py23_range
	323
	324	hex_chars = []
	325	for i in py23_range(0X110000):
	326	try:
	327	a = py23_unichr(i)
	328	except ValueError:
	329	break
	330	if a in set('0123456789'):
	331	continue
	332	if unicodedata.numeric(a, None) is not None:
	333	hex_chars.append(i)
	334
	335	hex_string = ', '.join(['0X{:X}'.format(i) for i in hex_chars])
	336	for line in textwrap.wrap(hex_string, width=60):
	337	print(' ', line)

+443

-302

natsort/utils.py less more

1	1	"""
2	2	Utilities and definitions for natsort, mostly all used to define
3	3	the _natsort_key function.
	4
	5	SOME CONVENTIONS USED IN THIS FILE.
	6
	7	1 - Factory Functions
	8
	9	Most of the logic of natsort revolves around factory functions
	10	that create branchless transformation functions. For example, rather
	11	than making a string transformation function that has an if
	12	statement to determine whether or not to perform .lowercase() at
	13	runtime for each element to transform, there is a string transformation
	14	factory function that will return a function that either calls
	15	.lowercase() or does nothing. In this way, all the branches and
	16	decisions are taken care of once, up front. In addition to a slight
	17	speed improvement, this provides a more extensible infrastructure.
	18
	19	Each of these factory functions will end with the suffix "_factory"
	20	to indicate that they themselves return a function.
	21
	22	2 - Keyword Parameters For Local Scope
	23
	24	Many of the closures that are created by the factory functions
	25	have signatures similar to the following
	26
	27	>>> def factory(parameter):
	28	... val = 'yes' if parameter else 'no'
	29	... def closure(x, val=val):
	30	... return '{} {}'.format(val, x)
	31	... return closure
	32	...
	33
	34	The variable value is passed as the default to a keyword argument.
	35	This is a micro-optimization
	36	that ensures "val" is a local variable instead of global variable
	37	and thus has a slightly improved performance at runtime.
	38
4	39	"""
5	40	from __future__ import (
6	41	print_function,

11	46
12	47	# Std. lib imports.
13	48	import re
14		from math import isnan
15	49	from warnings import warn
16		from os import curdir, pardir
17		from os.path import split, splitext
18		from itertools import islice
19		from locale import localeconv
	50	from os import curdir as os_curdir, pardir as os_pardir
	51	from os.path import split as path_split, splitext as path_splitext
	52	from itertools import chain as ichain
	53	from collections import deque
	54	from functools import partial, reduce
	55	from operator import methodcaller
	56	from unicodedata import normalize
20	57
21	58	# Local imports.
22		from natsort.ns_enum import ns, _ns
23		from natsort.unicode_numbers import digits, numeric
24		from natsort.locale_help import locale_convert, grouper
	59	from natsort.ns_enum import ns
	60	from natsort.unicode_numbers import numeric_no_decimals, digits_no_decimals
25	61	from natsort.compat.pathlib import PurePath, has_pathlib
	62	from natsort.compat.locale import (
	63	get_strxfrm,
	64	get_thousands_sep,
	65	get_decimal_point,
	66	)
26	67	from natsort.compat.py23 import (
27	68	py23_str,
28		py23_zip,
	69	py23_map,
	70	py23_filter,
29	71	PY_VERSION,
30		)
31		from natsort.compat.locale import (
32		dumb_sort,
33		use_pyicu,
34		null_string,
	72	NEWPY,
35	73	)
36	74	from natsort.compat.fastnumbers import (
37	75	fast_float,
38	76	fast_int,
39		isint,
40		isfloat,
41	77	)
42
43		# Group algorithm types for easy extraction
44		_NUMBER_ALGORITHMS = ns.FLOAT \| ns.INT \| ns.UNSIGNED \| ns.SIGNED \| ns.NOEXP
45		_ALL_BUT_PATH = (ns.F \| ns.I \| ns.U \| ns.S \| ns.N \| ns.L \|
46		ns.IC \| ns.LF \| ns.G \| ns.UG \| ns.TYPESAFE)
	78	if PY_VERSION >= 3:
	79	long = int
47	80
48	81	# The regex that locates floats - include Unicode numerals.
49		_float_sign_exp_re = r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?\|[{0}])'
50		_float_sign_exp_re = _float_sign_exp_re.format(numeric)
	82	_nnd = numeric_no_decimals
	83	_exp = r'(?:[eE][-+]?\d+)?'
	84	_num = r'(?:\d+\.?\d*\|\.\d+)'
	85	_float_sign_exp_re = r'([-+]?{0}{1}\|[{2}])'
	86	_float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, _nnd)
51	87	_float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U)
52		_float_nosign_exp_re = r'([0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?\|[{0}])'
53		_float_nosign_exp_re = _float_nosign_exp_re.format(numeric)
	88	_float_nosign_exp_re = r'({0}{1}\|[{2}])'
	89	_float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, _nnd)
54	90	_float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U)
55		_float_sign_noexp_re = r'([-+]?[0-9]*\.?[0-9]+\|[{0}])'
56		_float_sign_noexp_re = _float_sign_noexp_re.format(numeric)
	91	_float_sign_noexp_re = r'([-+]?{0}\|[{1}])'
	92	_float_sign_noexp_re = _float_sign_noexp_re.format(_num, _nnd)
57	93	_float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U)
58		_float_nosign_noexp_re = r'([0-9]*\.?[0-9]+\|[{0}])'
59		_float_nosign_noexp_re = _float_nosign_noexp_re.format(numeric)
	94	_float_nosign_noexp_re = r'({0}\|[{1}])'
	95	_float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, _nnd)
60	96	_float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U)
61		_float_sign_exp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?)\|[{0}]'
62		_float_sign_exp_re_c = _float_sign_exp_re_c.format(numeric)
63		_float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U)
64		_float_nosign_exp_re_c = r'([0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?\|[{0}])'
65		_float_nosign_exp_re_c = _float_nosign_exp_re_c.format(numeric)
66		_float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U)
67		_float_sign_noexp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+\|[{0}])'
68		_float_sign_noexp_re_c = _float_sign_noexp_re_c.format(numeric)
69		_float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U)
70		_float_nosign_noexp_re_c = r'([0-9]*[.,]?[0-9]+\|[{0}])'
71		_float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(numeric)
72		_float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U)
73	97
74	98	# Integer regexes - include Unicode digits.
75		_int_nosign_re = r'([0-9]+\|[{0}])'.format(digits)
	99	_int_nosign_re = r'(\d+\|[{0}])'.format(digits_no_decimals)
76	100	_int_nosign_re = re.compile(_int_nosign_re, flags=re.U)
77		_int_sign_re = r'([-+]?[0-9]+\|[{0}])'.format(digits)
	101	_int_sign_re = r'([-+]?\d+\|[{0}])'.format(digits_no_decimals)
78	102	_int_sign_re = re.compile(_int_sign_re, flags=re.U)
79	103
80	104	# This dict will help select the correct regex and number conversion function.
81		_regex_and_num_function_chooser = {
82		(ns.F \| ns.S, '.'): (_float_sign_exp_re, fast_float),
83		(ns.F \| ns.S \| ns.N, '.'): (_float_sign_noexp_re, fast_float),
84		(ns.F \| ns.U, '.'): (_float_nosign_exp_re, fast_float),
85		(ns.F \| ns.U \| ns.N, '.'): (_float_nosign_noexp_re, fast_float),
86		(ns.I \| ns.S, '.'): (_int_sign_re, fast_int),
87		(ns.I \| ns.S \| ns.N, '.'): (_int_sign_re, fast_int),
88		(ns.I \| ns.U, '.'): (_int_nosign_re, fast_int),
89		(ns.I \| ns.U \| ns.N, '.'): (_int_nosign_re, fast_int),
90		(ns.F \| ns.S, ','): (_float_sign_exp_re_c, fast_float),
91		(ns.F \| ns.S \| ns.N, ','): (_float_sign_noexp_re_c, fast_float),
92		(ns.F \| ns.U, ','): (_float_nosign_exp_re_c, fast_float),
93		(ns.F \| ns.U \| ns.N, ','): (_float_nosign_noexp_re_c, fast_float),
94		(ns.I \| ns.S, ','): (_int_sign_re, fast_int),
95		(ns.I \| ns.S \| ns.N, ','): (_int_sign_re, fast_int),
96		(ns.I \| ns.U, ','): (_int_nosign_re, fast_int),
97		(ns.I \| ns.U \| ns.N, ','): (_int_nosign_re, fast_int),
	105	_regex_chooser = {
	106	(ns.F \| ns.S): _float_sign_exp_re,
	107	(ns.F \| ns.S \| ns.N): _float_sign_noexp_re,
	108	(ns.F \| ns.U): _float_nosign_exp_re,
	109	(ns.F \| ns.U \| ns.N): _float_nosign_noexp_re,
	110	(ns.I \| ns.S): _int_sign_re,
	111	(ns.I \| ns.S \| ns.N): _int_sign_re,
	112	(ns.I \| ns.U): _int_nosign_re,
	113	(ns.I \| ns.U \| ns.N): _int_nosign_re,
98	114	}
99	115
100		# Dict to select checker function from converter function
101		_conv_to_check = {fast_float: isfloat, fast_int: isint}
	116
	117	def _no_op(x):
	118	"""A function that does nothing."""
	119	return x
	120
	121
	122	def _normalize_input_factory(alg):
	123	"""Create a function that will normalize unicode input data."""
	124	normalization_form = 'NFKD' if alg & ns.COMPATIBILITYNORMALIZE else 'NFD'
	125
	126	if NEWPY:
	127	return partial(normalize, normalization_form)
	128	else:
	129	def func(x):
	130	"""Normalize unicode input."""
	131	if isinstance(x, py23_str): # unicode
	132	return normalize(normalization_form, x)
	133	else:
	134	return x
	135	return func
	136
	137
	138	def _natsort_key(val, key, string_func, bytes_func, num_func):
	139	"""\
	140	Key to sort strings and numbers naturally.
	141
	142	It works by separating out the numbers from the strings. This function for
	143	internal use only. See the natsort_keygen documentation for details of each
	144	parameter.
	145
	146	Parameters
	147	----------
	148	val : str \| unicode
	149	key : callable \| None
	150	string_func : callable
	151	bytes_func : callable
	152	num_func : callable
	153
	154	Returns
	155	-------
	156	out : tuple
	157	The modified value with numbers extracted.
	158
	159	"""
	160
	161	# Apply key if needed
	162	if key is not None:
	163	val = key(val)
	164
	165	# Assume the input are strings, which is the most common case
	166	try:
	167	return string_func(val)
	168	except (TypeError, AttributeError):
	169
	170	# If bytes type, use the bytes_func
	171	if type(val) in (bytes,):
	172	return bytes_func(val)
	173
	174	# Otherwise, assume it is an iterable that must be parses recursively.
	175	# Do not apply the key recursively.
	176	try:
	177	return tuple(_natsort_key(
	178	x, None, string_func, bytes_func, num_func
	179	) for x in val)
	180
	181	# If that failed, it must be a number.
	182	except TypeError:
	183	return num_func(val)
	184
	185
	186	def _parse_bytes_factory(alg):
	187	"""Create a function that will format a bytes string in a tuple."""
	188	# We don't worry about ns.UNGROUPLETTERS \| ns.LOCALEALPHA because
	189	# bytes cannot be compared to strings.
	190	if alg & ns.PATH and alg & ns.IGNORECASE:
	191	return lambda x: ((x.lower(),),)
	192	elif alg & ns.PATH:
	193	return lambda x: ((x,),)
	194	elif alg & ns.IGNORECASE:
	195	return lambda x: (x.lower(),)
	196	else:
	197	return lambda x: (x,)
	198
	199
	200	def _parse_number_factory(alg, sep, pre_sep):
	201	"""Create a function that will properly format a number in a tuple."""
	202	nan_replace = float('+inf') if alg & ns.NANLAST else float('-inf')
	203
	204	def func(val, nan_replace=nan_replace, sep=sep):
	205	"""Given a number, place it in a tuple with a leading null string."""
	206	return sep, nan_replace if val != val else val
	207
	208	# Return the function, possibly wrapping in tuple if PATH is selected.
	209	if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
	210	return lambda x: (((pre_sep,), func(x)),)
	211	elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
	212	return lambda x: ((pre_sep,), func(x))
	213	elif alg & ns.PATH:
	214	return lambda x: (func(x),)
	215	else:
	216	return func
	217
	218
	219	def _parse_string_factory(alg, sep, splitter,
	220	input_transform,
	221	component_transform,
	222	final_transform):
	223	"""Create a function that will properly split and format a string."""
	224	# Sometimes we store the "original" input before transformation,
	225	# sometimes after.
	226	orig_after_xfrm = not (alg & ns._DUMB and alg & ns.LOCALEALPHA)
	227	original_func = input_transform if orig_after_xfrm else _no_op
	228	normalize_input = _normalize_input_factory(alg)
	229
	230	def func(x):
	231	# Apply string input transformation function and return to x.
	232	# Original function is usually a no-op, but some algorithms require it
	233	# to also be the transformation function.
	234	x = normalize_input(x)
	235	x, original = input_transform(x), original_func(x)
	236	x = splitter(x) # Split string into components.
	237	x = py23_filter(None, x) # Remove empty strings.
	238	x = py23_map(component_transform, x) # Apply transform on components.
	239	x = _sep_inserter(x, sep) # Insert '' between numbers.
	240	return final_transform(x, original) # Apply the final transform.
	241
	242	return func
	243
	244
	245	def _parse_path_factory(str_split):
	246	"""Create a function that will properly split and format a path."""
	247	return lambda x: tuple(py23_map(str_split, _path_splitter(x)))
	248
	249
	250	def _sep_inserter(iterable, sep):
	251	"""Insert '' between numbers."""
	252
	253	# Get the first element. If StopIteration is raised, that's OK.
	254	# Since we are controlling the types of the input, 'type' is used
	255	# instead of 'isinstance' for the small speed advantage it offers.
	256	try:
	257	types = (int, float, long)
	258	first = next(iterable)
	259	if type(first) in types:
	260	yield sep
	261	yield first
	262
	263	# Now, check if pair of elements are both numbers. If so, add ''.
	264	second = next(iterable)
	265	if type(first) in types and type(second) in types:
	266	yield sep
	267	yield second
	268
	269	# Now repeat in a loop.
	270	for x in iterable:
	271	first, second = second, x
	272	if type(first) in types and type(second) in types:
	273	yield sep
	274	yield second
	275	except StopIteration:
	276	# Catch StopIteration per deprecation in PEP 479:
	277	# "Change StopIteration handling inside generators"
	278	return
	279
	280
	281	def _input_string_transform_factory(alg):
	282	"""
	283	Given a set of natsort algorithms, return the function to operate
	284	on the pre-split input string according to the user's request.
	285	"""
	286	# Shortcuts.
	287	lowfirst = alg & ns.LOWERCASEFIRST
	288	dumb = alg & ns._DUMB
	289
	290	# Build the chain of functions to execute in order.
	291	function_chain = []
	292	if (dumb and not lowfirst) or (lowfirst and not dumb):
	293	function_chain.append(methodcaller('swapcase'))
	294
	295	if alg & ns.IGNORECASE:
	296	if NEWPY:
	297	function_chain.append(methodcaller('casefold'))
	298	else:
	299	function_chain.append(methodcaller('lower'))
	300
	301	if alg & ns.LOCALENUM:
	302	# Create a regular expression that will remove thousands separators.
	303	strip_thousands = r'''
	304	(?<=[0-9]{{1}}) # At least 1 number
	305	(?<![0-9]{{4}}) # No more than 3 numbers
	306	{nodecimal} # Cannot follow decimal
	307	{thou} # The thousands separator
	308	(?=[0-9]{{3}} # Three numbers must follow
	309	([^0-9]\|$) # But a non-number after that
	310	)
	311	'''
	312	nodecimal = r''
	313	if alg & ns.FLOAT:
	314	# Make a regular expression component that will ensure no
	315	# separators are removed after a decimal point.
	316	d = get_decimal_point()
	317	d = r'\.' if d == r'.' else d
	318	nodecimal += r'(?<!' + d + r'[0-9])'
	319	nodecimal += r'(?<!' + d + r'[0-9]{2})'
	320	nodecimal += r'(?<!' + d + r'[0-9]{3})'
	321	strip_thousands = strip_thousands.format(thou=get_thousands_sep(),
	322	nodecimal=nodecimal)
	323	strip_thousands = re.compile(strip_thousands, flags=re.VERBOSE)
	324	function_chain.append(partial(strip_thousands.sub, ''))
	325
	326	# Create a regular expression that will change the decimal point to
	327	# a period if not already a period.
	328	decimal = get_decimal_point()
	329	if alg & ns.FLOAT and decimal != '.':
	330	switch_decimal = r'(?<=[0-9]){decimal}\|{decimal}(?=[0-9])'
	331	switch_decimal = switch_decimal.format(decimal=decimal)
	332	switch_decimal = re.compile(switch_decimal)
	333	function_chain.append(partial(switch_decimal.sub, '.'))
	334
	335	# Return the chained functions.
	336	return chain_functions(function_chain)
	337
	338
	339	def _string_component_transform_factory(alg):
	340	"""
	341	Given a set of natsort algorithms, return the function to operate
	342	on the post-split strings according to the user's request.
	343	"""
	344	# Shortcuts.
	345	use_locale = alg & ns.LOCALEALPHA
	346	dumb = alg & ns._DUMB
	347	group_letters = (alg & ns.GROUPLETTERS) or (use_locale and dumb)
	348	nan_val = float('+inf') if alg & ns.NANLAST else float('-inf')
	349
	350	# Build the chain of functions to execute in order.
	351	func_chain = []
	352	if group_letters:
	353	func_chain.append(_groupletters)
	354	if use_locale:
	355	func_chain.append(get_strxfrm())
	356	kwargs = {'key': chain_functions(func_chain)} if func_chain else {}
	357
	358	# Return the correct chained functions.
	359	if alg & ns.FLOAT:
	360	kwargs['nan'] = nan_val
	361	return partial(fast_float, **kwargs)
	362	else:
	363	return partial(fast_int, **kwargs)
	364
	365
	366	def _final_data_transform_factory(alg, sep, pre_sep):
	367	"""
	368	Given a set of natsort algorithms, return the function to operate
	369	on the post-parsed strings according to the user's request.
	370	"""
	371	if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA:
	372	swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST
	373	transform = methodcaller('swapcase') if swap else _no_op
	374
	375	def func(split_val, val, transform=transform):
	376	"""
	377	Return a tuple with the first character of the first element
	378	of the return value as the first element, and the return value
	379	as the second element. This will be used to perform gross sorting
	380	by the first letter.
	381	"""
	382	split_val = tuple(split_val)
	383	if not split_val:
	384	return (), ()
	385	elif split_val[0] == sep:
	386	return (pre_sep,), split_val
	387	else:
	388	return (transform(val[0]),), split_val
	389	return func
	390	else:
	391	return lambda split_val, val: tuple(split_val)
	392
	393
	394	def _groupletters(x, _low=methodcaller('casefold' if NEWPY else 'lower')):
	395	"""Double all characters, making doubled letters lowercase."""
	396	return ''.join(ichain.from_iterable((_low(y), y) for y in x))
	397
	398
	399	def chain_functions(functions):
	400	"""
	401	Chain a list of single-argument functions together and return.
	402
	403	The functions are applied in list order, and the output of the
	404	previous functions is passed to the next function.
	405
	406	Parameters
	407	----------
	408	functions : list
	409	A list of single-argument functions to chain together.
	410
	411	Returns
	412	-------
	413	A single argument function.
	414
	415	Examples
	416	--------
	417	Chain several functions together!
	418
	419	>>> funcs = [lambda x: x * 4, len, lambda x: x + 5]
	420	>>> func = chain_functions(funcs)
	421	>>> func('hey')
	422	17
	423
	424	"""
	425	functions = list(functions)
	426	if not functions:
	427	return _no_op
	428	elif len(functions) == 1:
	429	return functions[0]
	430	else:
	431	# See https://stackoverflow.com/a/39123400/1399279
	432	return partial(reduce, lambda res, f: f(res), functions)
102	433
103	434
104	435	def _do_decoding(s, encoding):

109	440	raise
110	441	except (AttributeError, TypeError):
111	442	return s
	443
	444
	445	def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
	446	"""Split a string into its path components. Assumes a string is a path."""
	447	# If a PathLib Object, use it's functionality to perform the split.
	448	if has_pathlib and isinstance(s, PurePath):
	449	s = py23_str(s)
	450	path_parts = deque()
	451	p_appendleft = path_parts.appendleft
	452	# Continue splitting the path from the back until we have reached
	453	# '..' or '.', or until there is nothing left to split.
	454	path_location = s
	455	while path_location != os_curdir and path_location != os_pardir:
	456	parent_path = path_location
	457	path_location, child_path = path_split(parent_path)
	458	if path_location == parent_path:
	459	break
	460	p_appendleft(child_path)
	461
	462	# This last append is the base path.
	463	# Only append if the string is non-empty.
	464	if path_location:
	465	p_appendleft(path_location)
	466
	467	# Now, split off the file extensions using a similar method to above.
	468	# Continue splitting off file extensions until we reach a decimal number
	469	# or there are no more extensions.
	470	# We are not using built-in functionality of PathLib here because of
	471	# the recursive splitting up to a decimal.
	472	base = path_parts.pop()
	473	base_parts = deque()
	474	b_appendleft = base_parts.appendleft
	475	while True:
	476	front = base
	477	base, ext = path_splitext(front)
	478	if _d_match(ext) or not ext:
	479	# Reset base to before the split if the split is invalid.
	480	base = front
	481	break
	482	b_appendleft(ext)
	483	b_appendleft(base)
	484
	485	# Return the split parent paths and then the split basename.
	486	return ichain(path_parts, base_parts)
112	487
113	488
114	489	def _args_to_enum(**kwargs):

122	497	msg = "The 'number_type' argument is deprecated as of 3.5.0, "
123	498	msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'"
124	499	warn(msg, DeprecationWarning)
125		alg \|= (_ns['FLOAT'] * bool(kwargs['number_type'] is float))
126		alg \|= (_ns['INT'] * bool(kwargs['number_type'] in (int, None)))
127		alg \|= (_ns['SIGNED'] * (kwargs['number_type'] not in (float, None)))
	500	alg \|= (ns.FLOAT * bool(kwargs['number_type'] is float))
	501	alg \|= (ns.INT * bool(kwargs['number_type'] in (int, None)))
	502	alg \|= (ns.SIGNED * (kwargs['number_type'] not in (float, None)))
128	503	if 'signed' in kwargs and kwargs['signed'] is not None:
129	504	msg = "The 'signed' argument is deprecated as of 3.5.0, "
130	505	msg += "please use 'alg=ns.SIGNED'."
131	506	warn(msg, DeprecationWarning)
132		alg \|= (_ns['SIGNED'] * bool(kwargs['signed']))
	507	alg \|= (ns.SIGNED * bool(kwargs['signed']))
133	508	if 'exp' in kwargs and kwargs['exp'] is not None:
134	509	msg = "The 'exp' argument is deprecated as of 3.5.0, "
135	510	msg += "please use 'alg=ns.NOEXP'."
136	511	warn(msg, DeprecationWarning)
137		alg \|= (_ns['NOEXP'] * (not kwargs['exp']))
	512	alg \|= (ns.NOEXP * (not kwargs['exp']))
138	513	if 'as_path' in kwargs and kwargs['as_path'] is not None:
139	514	msg = "The 'as_path' argument is deprecated as of 3.5.0, "
140	515	msg += "please use 'alg=ns.PATH'."
141	516	warn(msg, DeprecationWarning)
142		alg \|= (_ns['PATH'] * kwargs['as_path'])
143		if 'py3_safe' in kwargs and kwargs['py3_safe'] is not None:
144		msg = "The 'py3_safe' argument is deprecated as of 3.5.0, "
145		msg += "please use 'alg=ns.TYPESAFE'."
146		warn(msg, DeprecationWarning)
147		alg \|= (_ns['TYPESAFE'] * kwargs['py3_safe'])
	517	alg \|= (ns.PATH * kwargs['as_path'])
148	518	return alg
149
150
151		def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters):
152		"""Helper to separate the string input into numbers and strings."""
153		conv_check = (numconv, _conv_to_check[numconv])
154
155		# Split the input string by numbers.
156		# If the input is not a string, TypeError is raised.
157		s = regex.split(s)
158
159		# Now convert the numbers to numbers, and leave strings as strings.
160		# Take into account locale if needed, and group letters if needed.
161		# Remove empty strings from the list.
162		if use_locale:
163		s = [locale_convert(x, conv_check, group_letters) for x in s if x]
164		elif group_letters:
165		s = [grouper(x, conv_check) for x in s if x]
166		else:
167		s = [numconv(x) for x in s if x]
168
169		# If the list begins with a number, lead with an empty string.
170		# This is used to get around the "unorderable types" issue.
171		if not s: # Return empty list for empty results.
172		return []
173		elif conv_check[1](s[0], num_only=True):
174		s = [null_string if use_locale else ''] + s
175
176		# The _py3_safe function inserts "" between numbers in the list,
177		# and is used to get around "unorderable types" in complex cases.
178		# It is a separate function that needs to be requested specifically
179		# because it is expensive to call.
180		return _py3_safe(s, use_locale, conv_check[1]) if py3_safe else s
181
182
183		def _path_splitter(s, _d_match=re.compile(r'\.\d').match):
184		"""Split a string into its path components. Assumes a string is a path."""
185		path_parts = []
186		p_append = path_parts.append
187		# Convert a pathlib PurePath object to a string.
188		if has_pathlib and isinstance(s, PurePath):
189		path_location = str(s)
190		else: # pragma: no cover
191		path_location = s
192
193		# Continue splitting the path from the back until we have reached
194		# '..' or '.', or until there is nothing left to split.
195		while path_location != curdir and path_location != pardir:
196		parent_path = path_location
197		path_location, child_path = split(parent_path)
198		if path_location == parent_path:
199		break
200		p_append(child_path)
201
202		# This last append is the base path.
203		# Only append if the string is non-empty.
204		if path_location:
205		p_append(path_location)
206
207		# We created this list in reversed order, so we now correct the order.
208		path_parts.reverse()
209
210		# Now, split off the file extensions using a similar method to above.
211		# Continue splitting off file extensions until we reach a decimal number
212		# or there are no more extensions.
213		base = path_parts.pop()
214		base_parts = []
215		b_append = base_parts.append
216		while True:
217		front = base
218		base, ext = splitext(front)
219		if _d_match(ext) or not ext:
220		# Reset base to before the split if the split is invalid.
221		base = front
222		break
223		b_append(ext)
224		b_append(base)
225		base_parts.reverse()
226
227		# Return the split parent paths and then the split basename.
228		return path_parts + base_parts
229
230
231		def _py3_safe(parsed_list, use_locale, check):
232		"""Insert '' between two numbers."""
233		length = len(parsed_list)
234		if length < 2:
235		return parsed_list
236		else:
237		new_list = [parsed_list[0]]
238		nl_append = new_list.append
239		for before, after in py23_zip(islice(parsed_list, 0, length-1),
240		islice(parsed_list, 1, None)):
241		if check(before, num_only=True) and check(after, num_only=True):
242		nl_append(null_string if use_locale else '')
243		nl_append(after)
244		return new_list
245
246
247		def _fix_nan(ret, alg):
248		"""Detect an NaN and replace or raise a ValueError."""
249		t = []
250		for r in ret:
251		if isfloat(r, num_only=True) and isnan(r):
252		if alg & _ns['NANLAST']:
253		t.append(float('+inf'))
254		else:
255		t.append(float('-inf'))
256		else:
257		t.append(r)
258		return tuple(t)
259
260
261		def _natsort_key(val, key, alg):
262		"""\
263		Key to sort strings and numbers naturally.
264
265		It works by separating out the numbers from the strings. This function for
266		internal use only. See the natsort_keygen documentation for details of each
267		parameter.
268
269		Parameters
270		----------
271		val : {str, unicode}
272		key : callable
273		alg : ns enum
274
275		Returns
276		-------
277		out : tuple
278		The modified value with numbers extracted.
279
280		"""
281
282		# Convert the arguments to the proper input tuple
283		try:
284		use_locale = alg & _ns['LOCALE']
285		inp_options = (alg & _NUMBER_ALGORITHMS,
286		localeconv()['decimal_point'] if use_locale else '.')
287		except TypeError:
288		msg = "_natsort_key: 'alg' argument must be from the enum 'ns'"
289		raise ValueError(msg+', got {0}'.format(py23_str(alg)))
290
291		# Get the proper regex and conversion function.
292		try:
293		regex, num_function = _regex_and_num_function_chooser[inp_options]
294		except KeyError: # pragma: no cover
295		if inp_options[1] not in ('.', ','): # pragma: no cover
296		raise ValueError("_natsort_key: currently natsort only supports "
297		"the decimal separators '.' and ','. "
298		"Please file a bug report.")
299		else:
300		raise
301		else:
302		# Apply key if needed.
303		if key is not None:
304		val = key(val)
305
306		# If this is a path, convert it.
307		# An AttrubuteError is raised if not a string.
308		split_as_path = False
309		if alg & _ns['PATH']:
310		try:
311		val = _path_splitter(val)
312		except AttributeError:
313		pass
314		else:
315		# Record that this string was split as a path so that
316		# we don't set PATH in the recursive call.
317		split_as_path = True
318
319		# Assume the input are strings, which is the most common case.
320		# Apply the string modification if needed.
321		orig_val = val
322		try:
323		lowfirst = alg & _ns['LOWERCASEFIRST']
324		dumb = dumb_sort() if use_locale else False
325		if use_locale and dumb and not lowfirst:
326		val = val.swapcase() # Compensate for bad locale lib.
327		elif lowfirst and not (use_locale and dumb):
328		val = val.swapcase()
329		if alg & _ns['IGNORECASE']:
330		val = val.casefold() if PY_VERSION >= 3.3 else val.lower()
331		gl = alg & _ns['GROUPLETTERS']
332		ret = tuple(_number_extracter(val,
333		regex,
334		num_function,
335		alg & _ns['TYPESAFE'],
336		use_locale,
337		gl or (use_locale and dumb)))
338		# Handle NaN.
339		if any(isfloat(x, num_only=True) and isnan(x) for x in ret):
340		ret = _fix_nan(ret, alg)
341		# For UNGROUPLETTERS, so the high level grouping can occur
342		# based on the first letter of the string.
343		# Do no locale transformation of the characters.
344		if use_locale and alg & _ns['UNGROUPLETTERS']:
345		if not ret:
346		return (ret, ret)
347		elif ret[0] == null_string:
348		return ((b'' if use_pyicu else '',), ret)
349		elif dumb:
350		if lowfirst:
351		return ((orig_val[0].swapcase(),), ret)
352		else:
353		return ((orig_val[0],), ret)
354		else:
355		return ((val[0],), ret)
356		else:
357		return ret
358		except (TypeError, AttributeError):
359		# Check if it is a bytes type, and if so return as a
360		# one element tuple.
361		if type(val) in (bytes,):
362		return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,)
363		# If not strings, assume it is an iterable that must
364		# be parsed recursively. Do not apply the key recursively.
365		# If this string was split as a path, turn off 'PATH'.
366		try:
367		was_path = alg & _ns['PATH']
368		newalg = alg & _ALL_BUT_PATH
369		newalg \|= (was_path * (not split_as_path))
370		return tuple([_natsort_key(x, None, newalg) for x in val])
371		# If there is still an error, it must be a number.
372		# Return as-is, with a leading empty string.
373		except TypeError:
374		n = null_string if use_locale else ''
375		if isfloat(val, num_only=True) and isnan(val):
376		val = _fix_nan([val], alg)[0]
377		return ((n, val,),) if alg & _ns['PATH'] else (n, val,)

+61

-18

setup.cfg less more

	0	[bumpversion]
	1	current_version = 5.3.3
	2	commit = True
	3	tag = True
	4	tag_name = {new_version}
	5
	6	[metadata]
	7	author = Seth M. Morton
	8	author_email = drtuba78@gmail.com
	9	url = https://github.com/SethMMorton/natsort
	10	description = Simple yet flexible natural sorting in Python.
	11	long_description = file: README.rst
	12	license = MIT
	13	classifiers =
	14	Development Status :: 5 - Production/Stable
	15	Intended Audience :: Developers
	16	Intended Audience :: Science/Research
	17	Intended Audience :: System Administrators
	18	Intended Audience :: Information Technology
	19	Intended Audience :: Financial and Insurance Industry
	20	Operating System :: OS Independent
	21	License :: OSI Approved :: MIT License
	22	Natural Language :: English
	23	Programming Language :: Python :: 2
	24	Programming Language :: Python :: 2.6
	25	Programming Language :: Python :: 2.7
	26	Programming Language :: Python :: 3
	27	Programming Language :: Python :: 3.3
	28	Programming Language :: Python :: 3.4
	29	Programming Language :: Python :: 3.5
	30	Programming Language :: Python :: 3.6
	31	Topic :: Scientific/Engineering :: Information Analysis
	32	Topic :: Utilities
	33	Topic :: Text Processing
	34
0	35	[bdist_wheel]
1	36	universal = 1
2	37
3	38	[sdist]
4		formats = zip,gztar
	39	formats = gztar
5	40
6		[pytest]
	41	[bumpversion:file:setup.py]
	42
	43	[bumpversion:file:natsort/_version.py]
	44
	45	[bumpversion:file:docs/source/conf.py]
	46
	47	[bumpversion:file:docs/source/changelog.rst]
	48	search = X.X.X
	49	replace = {new_version}
	50
	51	[tool:pytest]
7	52	flakes-ignore =
8		natsort/compat/py23.py UndefinedName
9		natsort/__init__.py UnusedImport
10		natsort/compat/* UnusedImport
11		docs/source/conf.py ALL
12		test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused
13		test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused
14		test_natsort/compat/* UnusedImport
15
16		pep8ignore =
17		natsort/ns_enum.py E126 E241 E123
18		test_natsort/test_natsort.py E501 E241 E221
19		test_natsort/test_utils.py E501 E241 E221
20		test_natsort/test_locale_help.py E501 E241 E221
21		test_natsort/test_main.py E501 E241 E221
22		test_natsort/profile_natsorted.py ALL
23		docs/source/conf.py ALL
	53	natsort/compat/py23.py UndefinedName
	54	natsort/__init__.py UnusedImport
	55	natsort/compat/* UnusedImport
	56	docs/source/conf.py ALL
	57	test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused
	58	test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused
	59	test_natsort/compat/* UnusedImport
	60	pep8ignore =
	61	natsort/ns_enum.py E126 E241 E123 E221
	62	test_natsort/test_*.py E501 E241 E221
	63	test_natsort/test_natsort_keygen.py E501 E241 E221 E701
	64	test_natsort/profile_natsorted.py ALL
	65	docs/source/conf.py ALL
24	66
25	67	[flake8]
26	68	max-line-length = 160
27	69	ignore = E231,E302
	70

-98

setup.py less more

0	0	#! /usr/bin/env python
1	1
2		# Std. lib imports
3		import re
4		import sys
5		from os.path import join
6
7		# Non-std lib imports
8		from setuptools import setup
9		from setuptools.command.test import test as TestCommand
10
11
12		class PyTest(TestCommand):
13		"""Custom command to run pytest on all code."""
14
15		def finalize_options(self):
16		TestCommand.finalize_options(self)
17		self.test_args = []
18		self.test_suite = True
19
20		def run_tests(self):
21		# import here, cause outside the eggs aren't loaded
22		import pytest
23		err1 = pytest.main(['--cov', 'natsort',
24		'--cov-report', 'term-missing',
25		'--flakes',
26		'--pep8',
27		'-s',
28		# '--failed',
29		# '-v',
30		])
31		err2 = pytest.main(['--doctest-modules', 'natsort'])
32		err3 = pytest.main(['README.rst',
33		'docs/source/intro.rst',
34		'docs/source/examples.rst'])
35		return err1 \| err2 \| err3
36
37
38		# Read the natsort.py file for the module version number
39		VERSIONFILE = join('natsort', '_version.py')
40		versionsearch = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]")
41		with open(VERSIONFILE, "rt") as fl:
42		for line in fl:
43		m = versionsearch.search(line)
44		if m:
45		VERSION = m.group(1)
46		break
47		else:
48		s = "Unable to locate version string in {0}"
49		raise RuntimeError(s.format(VERSIONFILE))
50
51		# Read in the documentation for the long_description
52		DESCRIPTION = 'Sort lists naturally'
53		try:
54		with open('README.rst') as fl:
55		LONG_DESCRIPTION = fl.read()
56		except IOError:
57		LONG_DESCRIPTION = DESCRIPTION
58
59		# The argparse module was introduced in python 2.7 or python 3.2
60		REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else ''
61
62		# Testing needs pytest, and mock if less than python 3.3
63		TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes',
64		'pytest-cov', 'pytest-cache', 'hypothesis']
65
66		if (sys.version.startswith('2') or
67		(sys.version.startswith('3') and int(sys.version.split('.')[1]) < 3)):
68		TESTS_REQUIRE.append('mock')
69		if (sys.version.startswith('2') or
70		(sys.version.startswith('3') and int(sys.version.split('.')[1]) < 4)):
71		TESTS_REQUIRE.append('pathlib')
72
73		# The setup parameters
	2	from setuptools import setup, find_packages
74	3	setup(
75	4	name='natsort',
76		version=VERSION,
77		author='Seth M. Morton',
78		author_email='drtuba78@gmail.com',
79		url='https://github.com/SethMMorton/natsort',
80		license='MIT',
81		install_requires=REQUIRES,
82		packages=['natsort', 'natsort.compat'],
	5	version='5.3.3',
	6	packages=find_packages(),
	7	install_requires=["argparse; python_version < '2.7'"],
83	8	entry_points={'console_scripts': ['natsort = natsort.__main__:main']},
84		tests_require=TESTS_REQUIRE,
85		cmdclass={'test': PyTest},
86		description=DESCRIPTION,
87		long_description=LONG_DESCRIPTION,
88		classifiers=(
89		'Development Status :: 5 - Production/Stable',
90		'Intended Audience :: Developers',
91		'Intended Audience :: Science/Research',
92		'Intended Audience :: System Administrators',
93		'Intended Audience :: Information Technology',
94		'Operating System :: OS Independent',
95		'License :: OSI Approved :: MIT License',
96		'Natural Language :: English',
97		'Programming Language :: Python :: 2.6',
98		'Programming Language :: Python :: 2.7',
99		'Programming Language :: Python :: 3',
100		'Topic :: Scientific/Engineering :: Information Analysis',
101		'Topic :: Utilities',
102		)
	9	extras_require={
	10	'fast': ["fastnumbers >= 2.0.0; python_version > '2.6'"],
	11	'icu': ["PyICU >= 1.0.0"]
	12	}
103	13	)

-27

~~test_natsort/compat/hypothesis.py~~ less more

0		# -- coding: utf-8 --
1		from __future__ import (
2		print_function,
3		division,
4		unicode_literals,
5		absolute_import
6		)
7		import sys
8		import compat.mock
9
10		major_minor = sys.version_info[:2]
11
12		# Use hypothesis if not on python 2.6.
13		if major_minor != (2, 6):
14		use_hypothesis = True
15		from hypothesis import assume, given, example
16		from hypothesis.specifiers import (
17		integers_in_range,
18		integers_from,
19		sampled_from,
20		)
21		# Otherwise mock these imports, because hypothesis
22		# is incompatible with python 2.6.
23		else:
24		example = integers_in_range = integers_from = \
25		sampled_from = assume = given = compat.mock.MagicMock()
26		use_hypothesis = False

+12

-17

test_natsort/compat/locale.py less more

9	9	import locale
10	10
11	11	# Local imports
12		from natsort.locale_help import use_pyicu
13		from natsort.compat.py23 import py23_str
	12	from natsort.compat.py23 import py23_str, py23_unichr, py23_range
14	13
15	14
16	15	def load_locale(x):
17	16	""" Convenience to load a locale, trying ISO8859-1 first."""
18	17	try:
19	18	locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x)))
20		except:
	19	except locale.Error:
21	20	locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x)))
22	21
23	22	# Check if de_DE is installed.

27	26	except locale.Error:
28	27	has_locale_de_DE = False
29	28
30		# Make a function that will return the appropriate
31		# strxfrm for the current locale.
32		if use_pyicu:
33		from natsort.locale_help import get_pyicu_transform
34		from locale import getlocale
35
36		def get_strxfrm():
37		return get_pyicu_transform(getlocale())
38		else:
39		from natsort.locale_help import strxfrm
40
41		def get_strxfrm():
42		return strxfrm
43
44	29	# Depending on the python version, use lower or casefold
45	30	# to make a string lowercase.
46	31	try:
47	32	low = py23_str.casefold
48	33	except AttributeError:
49	34	low = py23_str.lower
	35
	36	# There are some unicode values that are known failures on BSD systems
	37	# that has nothing to do with natsort (a ValueError is raised by strxfrm).
	38	# Let's filter them out.
	39	try:
	40	bad_uni_chars = set(py23_unichr(x) for x in py23_range(0X10fefd,
	41	0X10ffff+1))
	42	except ValueError:
	43	# Narrow unicode build... no worries.
	44	bad_uni_chars = set()

+36

-98

test_natsort/profile_natsorted.py less more

4	4	"""
5	5	from __future__ import print_function
6	6	import cProfile
7		import random
8	7	import sys
9	8
10	9	sys.path.insert(0, '.')
11		from natsort import natsorted, index_natsorted
	10	from natsort import natsort_keygen, ns
12	11	from natsort.compat.py23 import py23_range
	12	import locale
	13	locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
	14
	15	# Samples to parse
	16	number = 14695498
	17	int_string = '43493'
	18	float_string = '-434.93e7'
	19	plain_string = 'hello world'
	20	fancy_string = '7abba9342fdab'
	21	a_path = '/p/Folder (1)/file (1).tar.gz'
	22	some_bytes = b'these are bytes'
	23	a_list = ['hello', 'goodbye', '74']
	24
	25	basic_key = natsort_keygen()
	26	real_key = natsort_keygen(alg=ns.REAL)
	27	path_key = natsort_keygen(alg=ns.PATH)
	28	locale_key = natsort_keygen(alg=ns.LOCALE)
13	29
14	30
15		# Sample lists to sort
16		nums = random.sample(py23_range(10000), 1000)
17		nstr = list(map(str, random.sample(py23_range(10000), 1000)))
18		astr = ['a'+x+'num' for x in map(str, random.sample(py23_range(10000), 1000))]
19		tstr = [['a'+x, 'a-'+x]
20		for x in map(str, random.sample(py23_range(10000), 1000))]
21		cstr = ['a'+x+'-'+x for x in map(str, random.sample(py23_range(10000), 1000))]
	31	def prof_time_to_generate():
	32	print('* Generate Plain Key *')
	33	for _ in py23_range(100000):
	34	natsort_keygen()
	35	cProfile.run('prof_time_to_generate()', sort='time')
22	36
23	37
24		def prof_nums(a):
25		print('* Basic Call, Numbers *')
26		for _ in py23_range(1000):
27		natsorted(a)
28		cProfile.run('prof_nums(nums)', sort='time')
29
30
31		def prof_num_str(a):
32		print('* Basic Call, Numbers as Strings *')
33		for _ in py23_range(1000):
34		natsorted(a)
35		cProfile.run('prof_num_str(nstr)', sort='time')
36
37
38		def prof_str(a):
39		print('* Basic Call, Strings *')
40		for _ in py23_range(1000):
41		natsorted(a)
42		cProfile.run('prof_str(astr)', sort='time')
43
44
45		def prof_str_index(a):
46		print('* Basic Index Call *')
47		for _ in py23_range(1000):
48		index_natsorted(a)
49		cProfile.run('prof_str_index(astr)', sort='time')
50
51
52		def prof_nested(a):
53		print('* Basic Call, Nested Strings *')
54		for _ in py23_range(1000):
55		natsorted(a)
56		cProfile.run('prof_nested(tstr)', sort='time')
57
58
59		def prof_str_noexp(a):
60		print('* No-Exp Call *')
61		for _ in py23_range(1000):
62		natsorted(a, exp=False)
63		cProfile.run('prof_str_noexp(astr)', sort='time')
64
65
66		def prof_str_unsigned(a):
67		print('* Unsigned Call *')
68		for _ in py23_range(1000):
69		natsorted(a, signed=False)
70		cProfile.run('prof_str_unsigned(astr)', sort='time')
71
72
73		def prof_str_unsigned_noexp(a):
74		print('* Unsigned No-Exp Call *')
75		for _ in py23_range(1000):
76		natsorted(a, signed=False, exp=False)
77		cProfile.run('prof_str_unsigned_noexp(astr)', sort='time')
78
79
80		def prof_str_asint(a):
81		print('* Int Call *')
82		for _ in py23_range(1000):
83		natsorted(a, number_type=int)
84		cProfile.run('prof_str_asint(astr)', sort='time')
85
86
87		def prof_str_asint_unsigned(a):
88		print('* Unsigned Int (Versions) Call *')
89		for _ in py23_range(1000):
90		natsorted(a, number_type=int, signed=False)
91		cProfile.run('prof_str_asint_unsigned(astr)', sort='time')
92
93
94		def prof_str_key(a):
95		print('* Basic Call With Key *')
96		for _ in py23_range(1000):
97		natsorted(a, key=lambda x: x.upper())
98		cProfile.run('prof_str_key(astr)', sort='time')
99
100
101		def prof_str_index_key(a):
102		print('* Basic Index Call With Key *')
103		for _ in py23_range(1000):
104		index_natsorted(a, key=lambda x: x.upper())
105		cProfile.run('prof_str_index_key(astr)', sort='time')
106
107
108		def prof_str_unorderable(a):
109		print('* Basic Index Call, "Unorderable" *')
110		for _ in py23_range(1000):
111		natsorted(a)
112		cProfile.run('prof_str_unorderable(cstr)', sort='time')
	38	def prof_parsing(a, msg, key=basic_key):
	39	print(msg)
	40	for _ in py23_range(100000):
	41	key(a)
	42	cProfile.run('prof_parsing(int_string, "* Basic Call, Int as String *")', sort='time')
	43	cProfile.run('prof_parsing(float_string, "* Basic Call, Float as String *")', sort='time')
	44	cProfile.run('prof_parsing(float_string, "* Real Call *", real_key)', sort='time')
	45	cProfile.run('prof_parsing(number, "* Basic Call, Number *")', sort='time')
	46	cProfile.run('prof_parsing(fancy_string, "* Basic Call, Mixed String *")', sort='time')
	47	cProfile.run('prof_parsing(some_bytes, "* Basic Call, Byte String *")', sort='time')
	48	cProfile.run('prof_parsing(a_path, "* Path Call *", path_key)', sort='time')
	49	cProfile.run('prof_parsing(a_list, "* Basic Call, Recursive *")', sort='time')
	50	cProfile.run('prof_parsing("434,930,000 dollars", "* Locale Call *", locale_key)', sort='time')

+403

-142

test_natsort/slow_splitters.py less more

2	2	from __future__ import unicode_literals
3	3
4	4	import unicodedata
5		from natsort.compat.py23 import PY_VERSION
	5	import collections
	6	import itertools
	7	import functools
	8	from natsort.unicode_numbers import decimals
	9	from natsort.compat.py23 import PY_VERSION, py23_zip
6	10
7	11	if PY_VERSION >= 3.0:
8	12	long = int
9	13
10
11		def int_splitter(x, signed, safe, sep):
	14	triple_none = None, None, None
	15	_sentinel = object()
	16	SplitElement = collections.namedtuple('SplitElement',
	17	['isnum', 'val', 'isuni'])
	18
	19
	20	def int_splitter(iterable, signed, sep):
12	21	"""Alternate (slow) method to split a string into numbers."""
13		if not x:
14		return []
15		all_digits = set('0123456789')
16		full_list, strings, nums = [], [], []
17		input_len = len(x)
18		for i, char in enumerate(x):
19		# If this character is a sign and the next is a number,
20		# start a new number.
21		if (i+1 < input_len and signed and
22		(char in '-+') and (x[i+1] in all_digits)):
23		# Reset any current string or number.
24		if strings:
25		full_list.append(''.join(strings))
26		if nums:
27		full_list.append(int(''.join(nums)))
28		strings = []
29		nums = [char]
30		# If this is a number, add to the number list.
31		elif char in all_digits:
32		nums.append(char)
33		# Reset any string.
34		if strings:
35		full_list.append(''.join(strings))
36		strings = []
37		# If this is a unicode digit, append directly to the full list.
38		elif char.isdigit():
39		# Reset any string or number.
40		if strings:
41		full_list.append(''.join(strings))
42		if nums:
43		full_list.append(int(''.join(nums)))
44		strings = []
45		nums = []
46		full_list.append(unicodedata.digit(char))
47		# Otherwise add to the string.
48		else:
49		strings.append(char)
50		# Reset any number.
51		if nums:
52		full_list.append(int(''.join(nums)))
53		nums = []
54		if nums:
55		full_list.append(int(''.join(nums)))
56		elif strings:
57		full_list.append(''.join(strings))
58		if safe:
59		full_list = sep_inserter(full_list, (int, long), sep)
60		if type(full_list[0]) in (int, long):
61		return [sep] + full_list
62		else:
63		return full_list
64
65
66		def float_splitter(x, signed, exp, safe, sep):
	22	iterable = unicodedata.normalize('NFD', iterable)
	23	split_by_decimal = itertools.groupby(iterable, lambda a: a.isdigit())
	24	split_by_decimal = refine_split_grouping(split_by_decimal)
	25	split = int_splitter_iter(split_by_decimal, signed)
	26	split = sep_inserter(split, sep)
	27	return tuple(add_leading_space_if_first_is_num(split, sep))
	28
	29
	30	def float_splitter(iterable, signed, exp, sep):
67	31	"""Alternate (slow) method to split a string into numbers."""
68		if not x:
69		return []
70		all_digits = set('0123456789')
71		full_list, strings, nums = [], [], []
72		input_len = len(x)
73		for i, char in enumerate(x):
74		# If this character is a sign and the next is a number,
75		# start a new number.
76		if (i+1 < input_len and
77		(signed or (i > 1 and exp and x[i-1] in 'eE' and
78		x[i-2] in all_digits)) and
79		(char in '-+') and (x[i+1] in all_digits)):
80		# Reset any current string or number.
81		if strings:
82		full_list.append(''.join(strings))
83		if nums and i > 0 and x[i-1] not in 'eE':
84		full_list.append(float(''.join(nums)))
85		nums = [char]
	32
	33	def number_tester(x):
	34	return x.isdecimal() or unicodedata.numeric(x, None) is not None
	35
	36	iterable = unicodedata.normalize('NFD', iterable)
	37	split_by_decimal = itertools.groupby(iterable, number_tester)
	38	split_by_decimal = peekable(refine_split_grouping(split_by_decimal))
	39	split = float_splitter_iter(split_by_decimal, signed, exp)
	40	split = sep_inserter(split, sep)
	41	return tuple(add_leading_space_if_first_is_num(split, sep))
	42
	43
	44	def refine_split_grouping(iterable):
	45	"""Combines lists into strings, and separates unicode numbers from ASCII"""
	46	for isnum, values in iterable:
	47	values = list(values)
	48	# Further refine numbers into unicode and ASCII numeric characters.
	49	if isnum:
	50	num_grouped = group_unicode_and_ascii_numbers(values)
	51	for isuni, num_values in num_grouped:
	52	# If unicode, return one character at a time.
	53	if isuni:
	54	for u in num_values:
	55	yield SplitElement(True, u, True)
	56	# If ASCII, combine into a single multicharacter number.
	57	else:
	58	val = ''.join(num_values)
	59	yield SplitElement(True, val, False)
	60
	61	else:
	62	# If non-numeric, combine into a single string.
	63	val = ''.join(values)
	64	yield SplitElement(False, val, False)
	65
	66
	67	def group_unicode_and_ascii_numbers(
	68	iterable, ascii_digits=frozenset(decimals + '0123456789')
	69	):
	70	"""
	71	Use groupby to group ASCII and unicode numeric characters.
	72	Assumes all input is already all numeric characters.
	73	"""
	74	return itertools.groupby(iterable, lambda a: a not in ascii_digits)
	75
	76
	77	def int_splitter_iter(iterable, signed):
	78	"""Split the input into integers and strings."""
	79	for isnum, val, isuni in iterable:
	80	if isuni:
	81	yield unicodedata.digit(val)
	82	elif isnum:
	83	yield int(val)
	84	elif signed:
	85	for x in try_to_read_signed_integer(iterable, val):
	86	yield int(''.join(x)) if isinstance(x, list) else x
	87	else:
	88	yield val
	89
	90
	91	def float_splitter_iter(iterable, signed, exp):
	92	"""Split the input into integers and other."""
	93	weird_check = ('-inf', '-infinity', '+inf', '+infinity',
	94	'inf', 'infinity', 'nan', '-nan', '+nan')
	95	try_to_read_float_correctly = [
	96	try_to_read_float,
	97	try_to_read_float_with_exp,
	98	functools.partial(try_to_read_signed_float_template,
	99	key=try_to_read_float),
	100	functools.partial(try_to_read_signed_float_template,
	101	key=try_to_read_float_with_exp),
	102	][signed * 2 + exp * 1] # Choose the appropriate converter function.
	103	for isnum, val, isuni in iterable:
	104	if isuni:
	105	yield unicodedata.numeric(val)
	106	else:
	107	for x in try_to_read_float_correctly(iterable, isnum, val):
	108	if isinstance(x, list):
	109	yield float(''.join(x))
	110	elif x.lower().strip(' \t\n\r\f\v') in weird_check:
	111	yield float(x)
	112	else:
	113	yield x
	114
	115
	116	def try_to_read_signed_integer(iterable, val):
	117	"""
	118	If the given string ends with +/-, attempt to return a signed int.
	119	Otherwise, return the string as-is.
	120	"""
	121	if val.endswith(('+', '-')):
	122	next_element = next(iterable, None)
	123
	124	# Last element, return as-is.
	125	if next_element is None:
	126	yield val
	127	return
	128
	129	# We know the next value in the sequence must be "isnum == True".
	130	# We just need to handle unicode or not.
	131	_, next_val, next_isuni = next_element
	132
	133	# If unicode, don't apply sign and just return the val as-is
	134	# and convert the unicode character.
	135	if next_isuni:
	136	yield val
	137	yield unicodedata.digit(next_val)
	138
	139	# If the val is only the sign, return only the number.
	140	elif val in ('-', '+'):
	141	yield [val, next_val]
	142
	143	# Otherwise, remove the sign from the val and apply it to the number,
	144	# returning both.
	145	else:
	146	yield val[:-1]
	147	yield [val[-1], next_val]
	148
	149	else:
	150	yield val
	151
	152
	153	def try_to_read_float(iterable, isnum, val):
	154	"""
	155	Try to read a string that matches num.num and return as a float.
	156	Otherwise return the input as found.
	157	"""
	158	# Extract what is coming next.
	159	next_isnum, next_val, next_isuni = iterable.peek(triple_none)
	160
	161	# If a non-number was given, we can only accept a decimal point.
	162	if not isnum:
	163
	164	# If the next value is None or not a non-uni number, return as-is.
	165	if next_val is None or not next_isnum or next_isuni:
	166	yield val
	167
	168	# If this the decimal point, add it to the number and return.
	169	elif val == '.':
	170	next(iterable) # To progress the iterator.
	171	yield [val, next_val]
	172
	173	# If the val ends with the decimal point, split the decimal point
	174	# off the end of the string then place it to the front of the
	175	# iterable so that we can use it later.
	176	elif val.endswith('.'):
	177	iterable.push(SplitElement(False, val[-1], False))
	178	yield val[:-1]
	179
	180	# Otherwise, just return the val and move on.
	181	else:
	182	yield val
	183
	184	# If a number, read the number then try to get the post-decimal part.
	185	else:
	186
	187	# If the next element is not '.', return now.
	188	if next_val != '.':
	189	# If the next val starts with a '.', let's add that.
	190	if next_val is not None and next_val.startswith('.'):
	191	next(iterable) # To progress the iterator.
	192	iterable.push(SplitElement(False, next_val[1:], False))
	193	yield [val, next_val[0]]
86	194	else:
87		nums.append(char)
88		strings = []
89		# If this is a number, add to the number list.
90		elif char in all_digits:
91		nums.append(char)
92		# Reset any string.
93		if strings:
94		full_list.append(''.join(strings))
95		strings = []
96		# If this is a decimal, add to the number list.
97		elif (i + 1 < input_len and char == '.' and x[i+1] in all_digits):
98		if nums and '.' in nums:
99		full_list.append(float(''.join(nums)))
100		nums = []
101		nums.append(char)
102		if strings:
103		full_list.append(''.join(strings))
104		strings = []
105		# If this is an exponent, add to the number list.
106		elif (i > 0 and i + 1 < input_len and exp and char in 'eE' and
107		x[i-1] in all_digits and x[i+1] in all_digits \| set('+-')):
108		if 'e' in nums or 'E' in nums:
109		strings = [char]
110		full_list.append(float(''.join(nums)))
111		nums = []
	195	yield [val]
	196
	197	# Recursively parse the decimal and after. If the returned
	198	# value is a list, add the list to the current number.
	199	# If not, just return the number with the decimal.
	200	else:
	201	# If the first value returned from the try_to_read_float
	202	# is a list, add it to the float component list.
	203	next(iterable) # To progress the iterator.
	204	ret = next(try_to_read_float(iterable, next_isnum, next_val))
	205	if isinstance(ret, list):
	206	yield [val] + ret
112	207	else:
113		nums.append(char)
114		# If this is a unicode digit, append directly to the full list.
115		elif unicodedata.numeric(char, None) is not None:
116		# Reset any string or number.
117		if strings:
118		full_list.append(''.join(strings))
119		if nums:
120		full_list.append(float(''.join(nums)))
121		strings = []
122		nums = []
123		full_list.append(unicodedata.numeric(char))
124		# Otherwise add to the string.
125		else:
126		strings.append(char)
127		# Reset any number.
128		if nums:
129		full_list.append(float(''.join(nums)))
130		nums = []
131		if nums:
132		full_list.append(float(''.join(nums)))
133		elif strings:
134		full_list.append(''.join(strings))
135		# Fix a float that looks like a string.
136		fstrings = ('inf', 'infinity', '-inf', '-infinity',
137		'+inf', '+infinity', 'nan')
138		full_list = [float(y) if type(y) != float and y.lower() in fstrings else y
139		for y in full_list]
140		if safe:
141		full_list = sep_inserter(full_list, (float,), sep)
142		if type(full_list[0]) == float:
143		return [sep] + full_list
144		else:
145		return full_list
146
147
148		def sep_inserter(x, t, sep):
149		# Simulates the py3_safe function.
150		ret = [x[0]]
151		for i, y in enumerate(x[1:]):
152		if type(y) in t and type(x[i]) in t:
153		ret.append(sep)
154		ret.append(y)
	208	yield [val, next_val]
	209
	210
	211	def try_to_read_float_with_exp(iterable, isnum, val):
	212	"""
	213	Try to read a string that matches num.numE[+-]num and return as a float.
	214	Otherwise return the input as found.
	215	"""
	216	exp_ident = ('e', 'E', 'e-', 'E-', 'e+', 'E+')
	217
	218	# Start by reading the floating point part.
	219	float_ret = next(try_to_read_float(iterable, isnum, val))
	220
	221	# Extract what is coming next.
	222	next_isnum, next_val, next_isuni = iterable.peek(triple_none)
	223
	224	# If the float part is not a list, or the next value
	225	# is not in the exponential identifier list, return it as-is.
	226	if not isinstance(float_ret, list) or next_val not in exp_ident:
	227	yield float_ret
	228
	229	# We know the next_val is an exponential identifier. See if the value
	230	# after that is a non-unicode number. If so, return all as a float.
	231	# If not, put the exponential identifier back on the front of the
	232	# list and return the float_ret as-is.
	233	else:
	234	exp = SplitElement(next_isnum, next_val, next_isuni)
	235	next(iterable) # To progress the iterator.
	236	next_isnum, next_val, next_isuni = iterable.peek(triple_none)
	237	if next_isnum and not next_isuni:
	238	next(iterable) # To progress the iterator.
	239	yield float_ret + [exp.val, next_val]
	240	else:
	241	iterable.push(exp)
	242	yield float_ret
	243
	244
	245	def try_to_read_signed_float_template(iterable, isnum, val, key):
	246	"""
	247	Try to read a string that matches [+-]num.numE[+-]num and return as a
	248	float. Otherwise return the input as found.
	249	"""
	250	# Extract what is coming next.
	251	next_isnum, next_val, next_isuni = iterable.peek(triple_none)
	252
	253	# If it looks like there is a sign here and the next value is a
	254	# non-unicode number, try to parse that with the sign.
	255	if val.endswith(('+', '-')) and next_isnum and not next_isuni:
	256
	257	# If this value is a sign, return the combo.
	258	if val in ('+', '-'):
	259	next(iterable) # To progress the iterator.
	260	yield [val] + next(key(iterable, next_isnum, next_val))
	261
	262	# If the val ends with the sign split the sign off the end of
	263	# the string then place it to the front of the iterable so that
	264	# we can use it later.
	265	else:
	266	iterable.push(SplitElement(False, val[-1], False))
	267	yield val[:-1]
	268
	269	# If it looks like there is a sign here and the next value is a
	270	# decimal, try to parse as a decimal.
	271	elif val.endswith(('+.', '-.')) and next_isnum and not next_isuni:
	272
	273	# Push back a zero before the decimal then parse.
	274	print(val, iterable.peek())
	275
	276	# If this value is a sign, return the combo
	277	if val[:-1] in ('+', '-'):
	278	yield [val[:-1]] + next(key(iterable, False, val[-1]))
	279
	280	# If the val ends with the sign split the decimal the end of
	281	# the string then place it to the front of the iterable so that
	282	# we can use it later.
	283	else:
	284	iterable.push(SplitElement(False, val[-2:], False))
	285	yield val[:-2]
	286
	287	# If no sign, pass directly to the key function.
	288	else:
	289	yield next(key(iterable, isnum, val))
	290
	291
	292	def add_leading_space_if_first_is_num(iterable, sep):
	293	"""Check if the first element is a number, and prepend with space if so."""
	294	z, peek = itertools.tee(iterable)
	295	if type(next(peek, None)) in (int, long, float):
	296	z = itertools.chain([sep], z)
	297	del peek
	298	return z
	299
	300
	301	def sep_inserter(iterable, sep, types=frozenset((int, long, float))):
	302	"""Simulates the py3_safe function."""
	303	pairs = pairwise(iterable)
	304
	305	# Prime loop by handling first pair specially.
	306	try:
	307	first, second = next(pairs)
	308	except StopIteration:
	309	return
	310	if second is None: # Only one element
	311	yield first
	312	elif type(first) in types and type(second) in types:
	313	yield first
	314	yield sep
	315	yield second
	316	else:
	317	yield first
	318	yield second
	319
	320	# Handle all remaining pairs in loop.
	321	for first, second in pairs:
	322	if type(first) in types and type(second) in types:
	323	yield sep
	324	yield second
	325
	326
	327	def pairwise(iterable):
	328	"s -> (s0,s1), (s1,s2), (s2,s3), ..."
	329	split1, split2 = itertools.tee(iterable)
	330	a, b = itertools.tee(split1)
	331	test1, test2 = itertools.tee(split2)
	332	next(b, None)
	333	if next(test1, None) is None:
	334	ret = py23_zip(a, b) # Returns empty list
	335	elif next(test2, None) is not None and next(test2, None) is None:
	336	ret = py23_zip(a, [None]) # Return at least one value
	337	else:
	338	ret = py23_zip(a, b)
	339	del test1, test2, split2
155	340	return ret
	341
	342
	343	class peekable(object):
	344	"""Wrapper for an iterator to allow 1-item lookahead
	345	Call ``peek()`` on the result to get the value that will next pop out of
	346	``next()``, without advancing the iterator:
	347	>>> p = peekable(xrange(2))
	348	>>> p.peek()
	349	0
	350	>>> p.next()
	351	0
	352	>>> p.peek()
	353	1
	354	>>> p.next()
	355	1
	356	Pass ``peek()`` a default value, and it will be returned in the case where
	357	the iterator is exhausted:
	358	>>> p = peekable([])
	359	>>> p.peek('hi')
	360	'hi'
	361	If no default is provided, ``peek()`` raises ``StopIteration`` when there
	362	are no items left.
	363	To test whether there are more items in the iterator, examine the
	364	peekable's truth value. If it is truthy, there are more items.
	365	>>> assert peekable(xrange(1))
	366	>>> assert not peekable([])
	367	"""
	368	# Lowercase to blend in with itertools. The fact that it's a class is an
	369	# implementation detail.
	370
	371	def __init__(self, iterable):
	372	self._it = iter(iterable)
	373
	374	def __iter__(self):
	375	return self
	376
	377	def __nonzero__(self):
	378	try:
	379	self.peek()
	380	except StopIteration:
	381	return False
	382	return True
	383
	384	__bool__ = __nonzero__
	385
	386	def peek(self, default=_sentinel):
	387	"""Return the item that will be next returned from ``next()``.
	388	Return ``default`` if there are no items left. If ``default`` is not
	389	provided, raise ``StopIteration``.
	390	"""
	391	if not hasattr(self, '_peek'):
	392	try:
	393	self._peek = next(self._it)
	394	except StopIteration:
	395	if default is _sentinel:
	396	raise
	397	return default
	398	return self._peek
	399
	400	def next(self):
	401	ret = self.peek()
	402	try:
	403	del self._peek
	404	except AttributeError:
	405	pass
	406	return ret
	407
	408	__next__ = next
	409
	410	def push(self, value):
	411	"""Put an element at the front of the iterable."""
	412	if hasattr(self, '_peek'):
	413	self._it = itertools.chain([value, self._peek], self._it)
	414	del self._peek
	415	else:
	416	self._it = itertools.chain([value], self._it)

-54

~~test_natsort/stress_natsort.py~~ less more

0		# -- coding: utf-8 --
1		"""\
2		This file contains functions to stress-test natsort, looking
3		for cases that raise an unknown exception.
4		"""
5		from random import randint, sample, choice
6		from string import printable
7		from copy import copy
8		from pytest import fail
9		from natsort import natsorted
10		from natsort.compat.py23 import py23_range
11
12
13		def test_random():
14		"""Try to sort 100,000 randomly generated strings without exception."""
15
16		# Repeat test 100,000 times
17		for _ in py23_range(100000):
18		# Made a list of five randomly generated strings
19		lst = [''.join(sample(printable, randint(7, 30)))
20		for __ in py23_range(5)]
21		# Try to sort. If there is an exception, give some detailed info.
22		try:
23		natsorted(lst)
24		except Exception as e:
25		msg = "Ended with exception type '{exc}: {msg}'.\n"
26		msg += "Failed on the input {lst}."
27		fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst)))
28
29
30		def test_similar():
31		"""Try to sort 100,000 randomly generated
32		similar strings without exception.
33		"""
34
35		# Repeat test 100,000 times
36		for _ in py23_range(100000):
37		# Create a randomly generated string
38		base = sample(printable, randint(7, 30))
39		# Make a list of strings based on this string,
40		# with some randomly generated modifications
41		lst = []
42		for __ in py23_range(5):
43		new_str = copy(base)
44		for ___ in py23_range(randint(1, 5)):
45		new_str[randint(0, len(base)-1)] = choice(printable)
46		lst.append(''.join(new_str))
47		# Try to sort. If there is an exception, give some detailed info.
48		try:
49		natsorted(lst)
50		except Exception as e:
51		msg = "Ended with exception type '{exc}: {msg}'.\n"
52		msg += "Failed on the input {lst}."
53		fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst)))

+59

-64

test_natsort/test_fake_fastnumbers.py less more

3	3	"""
4	4	from __future__ import unicode_literals
5	5
6		import pytest
7	6	import unicodedata
8	7	from math import isnan
9		from natsort.compat.py23 import py23_str
	8	from natsort.compat.py23 import PY_VERSION
10	9	from natsort.compat.fake_fastnumbers import (
11	10	fast_float,
12	11	fast_int,
13		isfloat,
14		isint,
15	12	)
16		from compat.hypothesis import (
17		assume,
	13	from hypothesis import (
18	14	given,
19		use_hypothesis,
20	15	)
	16	from hypothesis.strategies import (
	17	floats,
	18	integers,
	19	text,
	20	)
	21
	22	if PY_VERSION >= 3:
	23	long = int
21	24
22	25
23	26	def is_float(x):

34	37	return True
35	38
36	39
	40	def not_a_float(x):
	41	return not is_float(x)
	42
	43
37	44	def is_int(x):
38	45	try:
39		int(x)
40		except ValueError:
	46	return x.is_integer()
	47	except AttributeError:
41	48	try:
42		unicodedata.digit(x)
43		except (ValueError, TypeError):
44		return False
	49	long(x)
	50	except ValueError:
	51	try:
	52	unicodedata.digit(x)
	53	except (ValueError, TypeError):
	54	return False
	55	else:
	56	return True
45	57	else:
46	58	return True
47		else:
48		return True
	59
	60
	61	def not_an_int(x):
	62	return not is_int(x)
	63
49	64
50	65	# Each test has an "example" version for demonstrative purposes,
51	66	# and a test that uses the hypothesis module.
52	67
53	68
	69	def test_fast_float_returns_nan_alternate_if_nan_option_is_given():
	70	assert fast_float('nan', nan=7) == 7
	71
	72
54	73	def test_fast_float_converts_float_string_to_float_example():
55	74	assert fast_float('45.8') == 45.8
56	75	assert fast_float('-45') == -45.0
57		assert fast_float('45.8e-2') == 45.8e-2
	76	assert fast_float('45.8e-2', key=len) == 45.8e-2
58	77	assert isnan(fast_float('nan'))
	78	assert isnan(fast_float('+nan'))
	79	assert isnan(fast_float('-NaN'))
	80	assert fast_float('۱۲.۱۲') == 12.12
	81	assert fast_float('-۱۲.۱۲') == -12.12
59	82
60	83
61		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
62		@given(float)
	84	@given(floats(allow_nan=False))
63	85	def test_fast_float_converts_float_string_to_float(x):
64		assume(not isnan(x)) # But inf is included
65	86	assert fast_float(repr(x)) == x
66	87
67	88

69	90	assert fast_float('invalid') == 'invalid'
70	91
71	92
72		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
73		@given(py23_str)
	93	@given(text().filter(not_a_float).filter(bool))
74	94	def test_fast_float_leaves_string_as_is(x):
75		assume(not is_float(x))
76	95	assert fast_float(x) == x
	96
	97
	98	def test_fast_float_with_key_applies_to_string_example():
	99	assert fast_float('invalid', key=len) == len('invalid')
	100
	101
	102	@given(text().filter(not_a_float).filter(bool))
	103	def test_fast_float_with_key_applies_to_string(x):
	104	assert fast_float(x, key=len) == len(x)
77	105
78	106
79	107	def test_fast_int_leaves_float_string_as_is_example():

82	110	assert fast_int('inf') == 'inf'
83	111
84	112
85		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
86		@given(float)
	113	@given(floats().filter(not_an_int))
87	114	def test_fast_int_leaves_float_string_as_is(x):
88		assume(not x.is_integer())
89	115	assert fast_int(repr(x)) == repr(x)
90	116
91	117
92	118	def test_fast_int_converts_int_string_to_int_example():
93	119	assert fast_int('-45') == -45
94	120	assert fast_int('+45') == 45
	121	assert fast_int('۱۲') == 12
	122	assert fast_int('-۱۲') == -12
95	123
96	124
97		@given(int)
	125	@given(integers())
98	126	def test_fast_int_converts_int_string_to_int(x):
99	127	assert fast_int(repr(x)) == x
100	128

103	131	assert fast_int('invalid') == 'invalid'
104	132
105	133
106		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
107		@given(py23_str)
	134	@given(text().filter(not_an_int).filter(bool))
108	135	def test_fast_int_leaves_string_as_is(x):
109		assume(not is_int(x))
110	136	assert fast_int(x) == x
111	137
112	138
113		def test_isfloat_returns_True_for_real_numbers_example():
114		assert isfloat(-45.0)
115		assert isfloat(45.8e-2)
	139	def test_fast_int_with_key_applies_to_string_example():
	140	assert fast_int('invalid', key=len) == len('invalid')
116	141
117	142
118		@given(float)
119		def test_isfloat_returns_True_for_real_numbers(x):
120		assert isfloat(x)
121
122
123		def test_isfloat_returns_False_for_strings_example():
124		assert not isfloat('45.8')
125		assert not isfloat('invalid')
126
127
128		@given(py23_str)
129		def test_isfloat_returns_False_for_strings(x):
130		assert not isfloat(x)
131
132
133		def test_isint_returns_True_for_real_numbers_example():
134		assert isint(-45)
135		assert isint(45)
136
137
138		@given(int)
139		def test_isint_returns_True_for_real_numbers(x):
140		assert isint(x)
141
142
143		def test_isint_returns_False_for_strings_example():
144		assert not isint('45')
145		assert not isint('invalid')
146
147
148		@given(py23_str)
149		def test_isint_returns_False_for_strings(x):
150		assert not isint(x)
	143	@given(text().filter(not_an_int).filter(bool))
	144	def test_fast_int_with_key_applies_to_string(x):
	145	assert fast_int(x, key=len) == len(x)

+57

-0

test_natsort/test_final_data_transform_factory.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	from natsort.ns_enum import ns
	5	from natsort.utils import _final_data_transform_factory
	6	from natsort.compat.py23 import py23_str
	7	from hypothesis import (
	8	given,
	9	)
	10	from hypothesis.strategies import (
	11	text,
	12	floats,
	13	integers,
	14	)
	15
	16
	17	# Each test has an "example" version for demonstrative purposes,
	18	# and a test that uses the hypothesis module.
	19
	20
	21	def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options_example():
	22	assert _final_data_transform_factory(0, '', '')(iter([7]), '') == (7,)
	23
	24
	25	@given(text())
	26	def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options(x):
	27	assert _final_data_transform_factory(0, '', '')(iter([x]), '') == (x,)
	28	# UNGROUPLETTERS without LOCALE does nothing, as does LOCALE without UNGROUPLETTERS
	29	assert _final_data_transform_factory(ns.UNGROUPLETTERS, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '')
	30	assert _final_data_transform_factory(ns.LOCALE, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '')
	31
	32
	33	def test_final_data_transform_factory_with_empty_tuple_returns_double_empty_tuple():
	34	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS, '', '')((), '') == ((), ())
	35
	36
	37	def test_final_data_transform_factory_with_null_string_first_element_adds_empty_string_on_first_tuple_element():
	38	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS, '', 'xx')(('', 60), '') == (('xx',), ('', 60))
	39
	40
	41	def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_example():
	42	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS, '', '')(('this', 60), 'this60') == (('t',), ('this', 60))
	43
	44
	45	@given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) \| integers())
	46	def test_final_data_transform_factory_returns_first_element_in_first_tuple_element(x, y):
	47	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y))
	48
	49
	50	def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST_example():
	51	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS \| ns._DUMB \| ns.LOWERCASEFIRST, '', '')(('this', 60), 'this60') == (('T',), ('this', 60))
	52
	53
	54	@given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) \| integers())
	55	def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST(x, y):
	56	assert _final_data_transform_factory(ns.LOCALE \| ns.UNGROUPLETTERS \| ns._DUMB \| ns.LOWERCASEFIRST, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y))

+183

-0

test_natsort/test_input_string_transform_factory.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	import pytest
	5	import locale
	6	from operator import methodcaller
	7	from natsort.ns_enum import ns
	8	from natsort.utils import _input_string_transform_factory
	9	from natsort.compat.py23 import NEWPY
	10	from compat.locale import (
	11	load_locale,
	12	has_locale_de_DE,
	13	)
	14	from hypothesis import (
	15	given,
	16	)
	17	from hypothesis.strategies import (
	18	text,
	19	integers,
	20	lists,
	21	)
	22
	23
	24	# Each test has an "example" version for demonstrative purposes,
	25	# and a test that uses the hypothesis module.
	26
	27
	28	def test_input_string_transform_factory_is_no_op_for_no_alg_options_examples():
	29	x = 'feijGGAd'
	30	assert _input_string_transform_factory(0)(x) is x
	31
	32
	33	@given(text())
	34	def test_input_string_transform_factory_is_no_op_for_no_alg_options(x):
	35	assert _input_string_transform_factory(0)(x) is x
	36
	37
	38	def test_input_string_transform_factory_performs_casefold_with_IGNORECASE_examples():
	39	x = 'feijGGAd'
	40	if NEWPY:
	41	assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.casefold()
	42	else:
	43	assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.lower()
	44
	45
	46	@given(text())
	47	def test_input_string_transform_factory_performs_casefold_with_IGNORECASE(x):
	48	if NEWPY:
	49	assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.casefold()
	50	else:
	51	assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.lower()
	52
	53
	54	def test_input_string_transform_factory_performs_swapcase_with_DUMB_examples():
	55	x = 'feijGGAd'
	56	assert _input_string_transform_factory(ns._DUMB)(x) == x.swapcase()
	57
	58
	59	@given(text())
	60	def test_input_string_transform_factory_performs_swapcase_with_DUMB(x):
	61	assert _input_string_transform_factory(ns._DUMB)(x) == x.swapcase()
	62
	63
	64	def test_input_string_transform_factory_performs_swapcase_with_LOWERCASEFIRST_example():
	65	x = 'feijGGAd'
	66	assert _input_string_transform_factory(ns.LOWERCASEFIRST)(x) == x.swapcase()
	67
	68
	69	@given(text())
	70	def test_input_string_transform_factory_performs_swapcase_with_LOWERCASEFIRST(x):
	71	x = 'feijGGAd'
	72	assert _input_string_transform_factory(ns.LOWERCASEFIRST)(x) == x.swapcase()
	73
	74
	75	def test_input_string_transform_factory_is_no_op_with_both_LOWERCASEFIRST_AND_DUMB_example():
	76	x = 'feijGGAd'
	77	assert _input_string_transform_factory(ns._DUMB \| ns.LOWERCASEFIRST)(x) is x
	78
	79
	80	@given(text())
	81	def test_input_string_transform_factory_is_no_op_with_both_LOWERCASEFIRST_AND_DUMB(x):
	82	assert _input_string_transform_factory(ns._DUMB \| ns.LOWERCASEFIRST)(x) is x
	83
	84
	85	def test_input_string_transform_factory_performs_swapcase_and_casefold_both_LOWERCASEFIRST_AND_IGNORECASE_example():
	86	x = 'feijGGAd'
	87	if NEWPY:
	88	assert _input_string_transform_factory(ns.IGNORECASE \| ns.LOWERCASEFIRST)(x) == x.swapcase().casefold()
	89	else:
	90	assert _input_string_transform_factory(ns.IGNORECASE \| ns.LOWERCASEFIRST)(x) == x.swapcase().lower()
	91
	92
	93	@given(text())
	94	def test_input_string_transform_factory_performs_swapcase_and_casefold_both_LOWERCASEFIRST_AND_IGNORECASE(x):
	95	if NEWPY:
	96	assert _input_string_transform_factory(ns.IGNORECASE \| ns.LOWERCASEFIRST)(x) == x.swapcase().casefold()
	97	else:
	98	assert _input_string_transform_factory(ns.IGNORECASE \| ns.LOWERCASEFIRST)(x) == x.swapcase().lower()
	99
	100
	101	def test_input_string_transform_factory_removes_thousands_separator_with_LOCALE_example():
	102	load_locale('en_US')
	103	x = '12,543,642,642.534,534,980' # Without FLOAT it does not account for decimal.
	104	assert _input_string_transform_factory(ns.LOCALE)(x) == '12543642642.534534980'
	105	x = '12,543,642,642.534,534,980' # LOCALEALPHA doesn't do anything... need LOCALENUM
	106	assert _input_string_transform_factory(ns.LOCALEALPHA)(x) == '12,543,642,642.534,534,980'
	107	locale.setlocale(locale.LC_ALL, str(''))
	108
	109
	110	@given(lists(elements=integers(), min_size=4, max_size=20))
	111	def test_input_string_transform_factory_removes_thousands_separator_with_LOCALE(x):
	112	load_locale('en_US')
	113	t = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, x)))) # Remove negative signs trailing L
	114	s = ''
	115	for i, y in enumerate(reversed(t), 1):
	116	s = y + s
	117	if i % 3 == 0 and i != len(t):
	118	s = ',' + s
	119	assert _input_string_transform_factory(ns.LOCALE)(s) == t
	120	locale.setlocale(locale.LC_ALL, str(''))
	121
	122
	123	def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT_example():
	124	x = '12,543,642,642.534,534,980'
	125	assert _input_string_transform_factory(ns.LOCALE \| ns.FLOAT)(x) == '12543642642.534,534980'
	126
	127
	128	@given(lists(elements=integers(), min_size=4, max_size=20), lists(elements=integers(), min_size=4, max_size=20))
	129	def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT(x, y):
	130	load_locale('en_US')
	131	t = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, x)))) # Remove negative signs trailing L
	132	s = ''
	133	for i, z in enumerate(reversed(t), 1):
	134	s = z + s
	135	if i % 3 == 0 and i != len(t):
	136	s = ',' + s
	137	u = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, y)))) # Remove negative signs trailing L
	138	v = ''
	139	for i, z in enumerate(reversed(u), 1):
	140	v = z + v
	141	if i % 3 == 0 and i != len(u):
	142	v = ',' + v
	143	# Remove all but first comma.
	144	a = v.split(',', 1)
	145	p = a[0] + ',' + a[1].replace(',', '')
	146	assert _input_string_transform_factory(ns.LOCALE)('.'.join([s, v])) == '.'.join([t, u])
	147	assert _input_string_transform_factory(ns.LOCALE \| ns.FLOAT)('.'.join([s, v])) == '.'.join([t, p])
	148	locale.setlocale(locale.LC_ALL, str(''))
	149
	150
	151	# These might be too much to test with hypothesis.
	152
	153
	154	def test_input_string_transform_factory_leaves_invalid_thousands_separator_with_LOCALE_example():
	155	load_locale('en_US')
	156	x = '12,543,642642.5345,34980'
	157	assert _input_string_transform_factory(ns.LOCALE)(x) == '12543,642642.5345,34980'
	158	x = '12,59443,642,642.53,4534980'
	159	assert _input_string_transform_factory(ns.LOCALE)(x) == '12,59443,642642.53,4534980'
	160	x = '12543,642,642.5,34534980'
	161	assert _input_string_transform_factory(ns.LOCALE)(x) == '12543,642642.5,34534980'
	162	locale.setlocale(locale.LC_ALL, str(''))
	163
	164
	165	# @pytest.mark.skipif(not has_locale_de_DE or dumb_sort(), reason='requires de_DE locale and working locale')
	166	@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale')
	167	def test_input_string_transform_factory_replaces_decimal_separator_with_LOCALE_example():
	168	load_locale('de_DE')
	169	x = '1543,753'
	170	assert _input_string_transform_factory(ns.LOCALE)(x) == '1543,753' # Does nothing without FLOAT
	171	assert _input_string_transform_factory(ns.LOCALE \| ns.FLOAT)(x) == '1543.753'
	172	assert _input_string_transform_factory(ns.LOCALEALPHA)(x) == '1543,753' # LOCALEALPHA doesn't do anything... need LOCALENUM
	173	locale.setlocale(locale.LC_ALL, str(''))
	174
	175
	176	# @pytest.mark.skipif(not has_locale_de_DE or dumb_sort(), reason='requires de_DE locale and working locale')
	177	@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale')
	178	def test_input_string_transform_factory_does_not_replace_invalid_decimal_separator_with_LOCALE_example():
	179	load_locale('de_DE')
	180	x = '154s,t53'
	181	assert _input_string_transform_factory(ns.LOCALE \| ns.FLOAT)(x) == '154s,t53'
	182	locale.setlocale(locale.LC_ALL, str(''))

-122

~~test_natsort/test_locale_help.py~~ less more

0		# -- coding: utf-8 --
1		"""\
2		Test the locale help module module.
3		"""
4		from __future__ import unicode_literals
5
6		import locale
7		import pytest
8		from math import isnan
9		from itertools import chain
10		from natsort.compat.fake_fastnumbers import fast_float, isfloat
11		from natsort.locale_help import grouper, locale_convert
12		from natsort.compat.py23 import py23_str
13		from natsort.compat.locale import use_pyicu
14		from compat.locale import (
15		load_locale,
16		has_locale_de_DE,
17		get_strxfrm,
18		low,
19		)
20		from compat.hypothesis import (
21		assume,
22		given,
23		use_hypothesis,
24		)
25
26
27		# Each test has an "example" version for demonstrative purposes,
28		# and a test that uses the hypothesis module.
29
30
31		def test_grouper_returns_letters_with_lowercase_transform_of_letter_example():
32		assert grouper('HELLO', (fast_float, isfloat)) == 'hHeElLlLoO'
33		assert grouper('hello', (fast_float, isfloat)) == 'hheelllloo'
34
35
36		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
37		@given(py23_str)
38		def test_grouper_returns_letters_with_lowercase_transform_of_letter(x):
39		assume(type(fast_float(x)) is not float)
40		assert grouper(x, (fast_float, isfloat)) == ''.join(chain.from_iterable([low(y), y] for y in x))
41
42
43		def test_grouper_returns_float_string_as_float_example():
44		assert grouper('45.8e-2', (fast_float, isfloat)) == 45.8e-2
45
46
47		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
48		@given(float)
49		def test_grouper_returns_float_string_as_float(x):
50		assume(not isnan(x))
51		assert grouper(repr(x), (fast_float, isfloat)) == x
52
53
54		def test_locale_convert_transforms_float_string_to_float_example():
55		load_locale('en_US')
56		assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8
57		locale.setlocale(locale.LC_NUMERIC, str(''))
58
59
60		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
61		@given(float)
62		def test_locale_convert_transforms_float_string_to_float(x):
63		assume(not isnan(x))
64		load_locale('en_US')
65		assert locale_convert(repr(x), (fast_float, isfloat), False) == x
66		locale.setlocale(locale.LC_NUMERIC, str(''))
67
68
69		def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string_example():
70		load_locale('en_US')
71		strxfrm = get_strxfrm()
72		assert locale_convert('45,8', (fast_float, isfloat), False) == strxfrm('45,8')
73		assert locale_convert('hello', (fast_float, isfloat), False) == strxfrm('hello')
74		locale.setlocale(locale.LC_NUMERIC, str(''))
75
76
77		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
78		@given(py23_str)
79		def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(x):
80		assume(type(fast_float(x)) is not float)
81		load_locale('en_US')
82		strxfrm = get_strxfrm()
83		assert locale_convert(x, (fast_float, isfloat), False) == strxfrm(x)
84		locale.setlocale(locale.LC_NUMERIC, str(''))
85
86
87		def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters_example():
88		load_locale('en_US')
89		strxfrm = get_strxfrm()
90		assert locale_convert('hello', (fast_float, isfloat), True) == strxfrm('hheelllloo')
91		assert locale_convert('45,8', (fast_float, isfloat), True) == strxfrm('4455,,88')
92		locale.setlocale(locale.LC_NUMERIC, str(''))
93
94
95		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
96		@given(py23_str)
97		def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(x):
98		assume(type(fast_float(x)) is not float)
99		load_locale('en_US')
100		strxfrm = get_strxfrm()
101		assert locale_convert(x, (fast_float, isfloat), True) == strxfrm(''.join(chain.from_iterable([low(y), y] for y in x)))
102		locale.setlocale(locale.LC_NUMERIC, str(''))
103
104
105		@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale')
106		def test_locale_convert_transforms_float_string_to_float_with_de_locale_example():
107		load_locale('de_DE')
108		assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8
109		assert locale_convert('45,8', (fast_float, isfloat), False) == 45.8
110		locale.setlocale(locale.LC_NUMERIC, str(''))
111
112
113		@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale')
114		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
115		@given(float)
116		def test_locale_convert_transforms_float_string_to_float_with_de_locale(x):
117		assume(not isnan(x))
118		load_locale('de_DE')
119		assert locale_convert(repr(x), (fast_float, isfloat), False) == x
120		assert locale_convert(repr(x).replace('.', ','), (fast_float, isfloat), False) == x
121		locale.setlocale(locale.LC_NUMERIC, str(''))

+38

-81

test_natsort/test_main.py less more

2	2	Test the natsort command-line tool functions.
3	3	"""
4	4	from __future__ import print_function, unicode_literals
5		import pytest
6	5	import re
7	6	import sys
8	7	from pytest import raises
9	8	from compat.mock import patch, call
10		from compat.hypothesis import (
11		assume,
	9	from hypothesis import (
12	10	given,
13		integers_from,
14		integers_in_range,
15		sampled_from,
16		use_hypothesis,
	11	)
	12	from hypothesis.strategies import (
	13	integers,
	14	floats,
	15	lists,
	16	data,
17	17	)
18	18	from natsort.__main__ import (
19	19	main,

22	22	keep_entry_range,
23	23	exclude_entry,
24	24	sort_and_print_entries,
25		py23_str,
26	25	)
27	26
28	27

65	64
66	65	class Args:
67	66	"""A dummy class to simulate the argparse Namespace object"""
68		def __init__(self, filter, reverse_filter, exclude, as_path, reverse):
69		self.filter = filter
	67	def __init__(self, filt, reverse_filter, exclude, as_path, reverse):
	68	self.filter = filt
70	69	self.reverse_filter = reverse_filter
71	70	self.exclude = exclude
72	71	self.reverse = reverse

170	169	assert range_check(6.4, 30) == (6.4, 30.0)
171	170
172	171
173		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
174		@given(x=int, y=int)
175		def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, y):
176		assume(x < y)
177		assert range_check(x, y) == (float(x), float(y))
178
179
180		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
181		@given(x=float, y=float)
182		def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, y):
183		assume(x < y)
	172	@given(x=integers(), data=data()) # Defer data selection for y till test is run.
	173	def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, data):
	174	# Pull data such that the first is less than the second.
	175	y = data.draw(integers(min_value=x + 1))
	176	assert range_check(x, y) == (x, y)
	177
	178
	179	@given(x=floats(allow_nan=False, min_value=-1E8, max_value=1E8), data=data()) # Defer data selection for y till test is run.
	180	def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, data):
	181	# Pull data such that the first is less than the second.
	182	y = data.draw(floats(min_value=x + 1.0, max_value=1E9, allow_nan=False))
184	183	assert range_check(x, y) == (x, y)
185	184
186	185

190	189	assert str(err.value) == 'low >= high'
191	190
192	191
193		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
194		@given(x=float, y=float)
195		def test_range_check_raises_ValueError_if_second_is_less_than_first(x, y):
196		assume(x >= y)
197		with raises(ValueError) as err:
198		range_check(x, x)
	192	@given(x=floats(allow_nan=False), data=data()) # Defer data selection for y till test is run.
	193	def test_range_check_raises_ValueError_if_second_is_less_than_first(x, data):
	194	# Pull data such that the first is greater than or equal to the second.
	195	y = data.draw(floats(max_value=x, allow_nan=False))
	196	with raises(ValueError) as err:
	197	range_check(x, y)
199	198	assert str(err.value) == 'low >= high'
200	199
201	200

205	204	assert check_filter(None) is None
206	205
207	206
208		def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid_example():
209		assert check_filter([(6, 7)]) == [(6.0, 7.0)]
210		assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)]
211
212
213		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
214		@given(x=(int, int, float, float), y=(int, float, float, int))
215		def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(x, y):
216		assume(all(i < j for i, j in zip(x, y)))
217		assert check_filter(list(zip(x, y))) == [(float(i), float(j)) for i, j in zip(x, y)]
	207	def test_check_filter_returns_input_as_is_if_filter_is_valid_example():
	208	assert check_filter([(6, 7)]) == [(6, 7)]
	209	assert check_filter([(6, 7), (2, 8)]) == [(6, 7), (2, 8)]
	210
	211
	212	@given(x=lists(integers(), min_size=1), data=data()) # Defer data selection for y till test is run.
	213	def test_check_filter_returns_input_as_is_if_filter_is_valid(x, data):
	214	y = [data.draw(integers(min_value=val + 1)) for val in x] # ensure y is element-wise greater than x
	215	assert check_filter(list(zip(x, y))) == [(i, j) for i, j in zip(x, y)]
218	216
219	217
220	218	def test_check_filter_raises_ValueError_if_filter_is_invalid_example():

223	221	assert str(err.value) == 'Error in --filter: low >= high'
224	222
225	223
226		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
227		@given(x=(int, int, float, float), y=(int, float, float, int))
228		def test_check_filter_raises_ValueError_if_filter_is_invalid(x, y):
229		assume(any(i >= j for i, j in zip(x, y)))
	224	@given(x=lists(integers(), min_size=1), data=data()) # Defer data selection for y till test is run.
	225	def test_check_filter_raises_ValueError_if_filter_is_invalid(x, data):
	226	y = [data.draw(integers(max_value=val)) for val in x] # ensure y is element-wise less than or equal to x
230	227	with raises(ValueError) as err:
231	228	check_filter(list(zip(x, y)))
232	229	assert str(err.value) == 'Error in --filter: low >= high'

236	233	assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+'))
237	234
238	235
239		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
240		@given((py23_str, integers_in_range(1, 99), py23_str, integers_in_range(1, 99), py23_str))
241		def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(x):
242		s = ''.join(map(py23_str, x))
243		assume(any(0 < int(i) < 100 for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
244		assert keep_entry_range(s, [0], [100], int, re.compile(r'\d+'))
245
246
247	236	def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds_example():
248	237	assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+'))
249	238
250	239
251		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
252		@given((py23_str, integers_in_range(2, 89), py23_str, integers_in_range(2, 89), py23_str))
253		def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(x):
254		s = ''.join(map(py23_str, x))
255		assume(any((1 < int(i) < 20) or (88 < int(i) < 90) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
256		assert keep_entry_range(s, [1, 88], [20, 90], int, re.compile(r'\d+'))
257
258
259	240	def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds_example():
260	241	assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+'))
261	242
262	243
263		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
264		@given((py23_str, integers_from(21), py23_str, integers_from(21), py23_str))
265		def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(x):
266		s = ''.join(map(py23_str, x))
267		assume(all(not (1 <= int(i) <= 20) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
268		assert not keep_entry_range(s, [1], [20], int, re.compile(r'\d+'))
269
270
271	244	def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example():
272	245	assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+'))
273	246
274	247
275		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
276		@given((py23_str, integers_from(0), py23_str, integers_from(0), py23_str))
277		def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(x):
278		s = ''.join(map(py23_str, x))
279		assume(not any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
280		assert exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+'))
281
282
283	248	def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example():
284	249	assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+'))
285
286
287		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
288		@given((py23_str, sampled_from([23, 45, 87]), py23_str, sampled_from([23, 45, 87]), py23_str))
289		def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(x):
290		s = ''.join(map(py23_str, x))
291		assume(any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i)))
292		assert not exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+'))

-399

~~test_natsort/test_natsort.py~~ less more

0		# -- coding: utf-8 --
1		"""\
2		Here are a collection of examples of how this module can be used.
3		See the README or the natsort homepage for more details.
4		"""
5		from __future__ import unicode_literals, print_function
6		import pytest
7		import sys
8		import warnings
9		import locale
10		from operator import itemgetter
11		from pytest import raises
12		from natsort import (
13		natsorted,
14		index_natsorted,
15		natsort_key,
16		versorted,
17		index_versorted,
18		humansorted,
19		index_humansorted,
20		natsort_keygen,
21		order_by_index,
22		ns,
23		realsorted,
24		index_realsorted,
25		decoder,
26		as_ascii,
27		as_utf8,
28		)
29		from compat.locale import load_locale, has_locale_de_DE
30		from natsort.utils import _natsort_key
31
32
33		def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is():
34		f = decoder('latin1')
35		a = 'bytes'
36		b = 14
37		assert f(b'bytes') == a
38		assert f(b) is b # returns as-is, same object ID
39		if sys.version[0] == '3':
40		assert f(a) is a # same object returned on Python3 b/c only bytes has decode
41		else:
42		assert f(a) is not a
43		assert f(a) == a # not same object on Python2 because str can decode
44
45
46		def test_as_ascii_returns_bytes_as_ascii():
47		assert decoder('ascii')(b'bytes') == as_ascii(b'bytes')
48
49
50		def test_as_utf8_returns_bytes_as_utf8():
51		assert decoder('utf8')(b'bytes') == as_utf8(b'bytes')
52
53
54		def test_natsort_key_public_raises_DeprecationWarning_when_called():
55		# Identical to _natsort_key
56		# But it raises a deprecation warning
57		with warnings.catch_warnings(record=True) as w:
58		warnings.simplefilter("always")
59		assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I)
60		assert len(w) == 1
61		assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message)
62		# It is called for each element in a list when sorting
63		with warnings.catch_warnings(record=True) as w:
64		warnings.simplefilter("always")
65		a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6']
66		a.sort(key=natsort_key)
67		assert len(w) == 7
68
69
70		def test_natsort_keygen_returns_natsort_key_with_alg_option():
71		a = 'a-5.034e1'
72		assert natsort_keygen()(a) == _natsort_key(a, None, ns.I)
73		assert natsort_keygen(alg=ns.F \| ns.S)(a) == _natsort_key(a, None, ns.F \| ns.S)
74
75
76		def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key():
77		a = 'a-5.034e1'
78		f1 = natsort_keygen(key=lambda x: x.upper())
79
80		def f2(x):
81		return _natsort_key(x, lambda y: y.upper(), ns.I)
82		assert f1(a) == f2(a)
83
84
85		def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted():
86		a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
87		b = a[:]
88		a.sort(key=natsort_keygen(alg=ns.F))
89		assert a == natsorted(b, alg=ns.F)
90
91
92		def test_natsorted_returns_strings_with_numbers_in_ascending_order():
93		a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6']
94		assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10']
95
96
97		def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents():
98		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
99		assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
100
101
102		def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option():
103		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
104		assert natsorted(a, alg=ns.N \| ns.F \| ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
105		# UNSIGNED is default
106		assert natsorted(a, alg=ns.NOEXP \| ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
107
108
109		def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option():
110		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
111		assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
112		# INT is default
113		assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
114
115
116		def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option():
117		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
118		assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
119		assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
120
121
122		def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option():
123		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
124		assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
125
126
127		def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option():
128		a = ['a-5', 'a7', 'a+2']
129		assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7']
130
131
132		def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option():
133		a = ['a-5', 'a7', 'a+2']
134		assert natsorted(a) == ['a7', 'a+2', 'a-5']
135
136
137		def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option():
138		a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
139		assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
140		assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
141
142
143		def test_natsorted_returns_sorted_list_with_mixed_type_input_and_does_not_raise_TypeError_on_Python3():
144		# You can mix types with natsorted. This can get around the new
145		# 'unorderable types' issue with Python 3.
146		a = [6, 4.5, '7', '2.5', 'a']
147		assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a']
148		a = [46, '5a5b2', 'af5', '5a5-4']
149		assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5']
150
151
152		def test_natsorted_with_mixed_input_returns_sorted_results_without_error():
153		a = ['2', 'ä', 'b', 1.5, 3]
154		assert natsorted(a) == [1.5, '2', 3, 'b', 'ä']
155
156
157		def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST():
158		a = ['25', 5, float('nan'), 1E40]
159		# The slice is because NaN != NaN
160		assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3]
161
162
163		def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST():
164		a = ['25', 5, float('nan'), 1E40]
165		# The slice is because NaN != NaN
166		assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:]
167
168
169		def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3():
170		if sys.version[0] == '3':
171		with raises(TypeError) as e:
172		assert natsorted(['ä', b'b'])
173		assert 'bytes' in str(e.value)
174		else:
175		assert True
176
177
178		def test_natsorted_raises_ValueError_for_non_iterable_input():
179		with raises(TypeError) as err:
180		natsorted(100)
181		assert str(err.value) == "'int' object is not iterable"
182
183
184		def test_natsorted_recursivley_applies_key_to_nested_lists_to_return_sorted_nested_list():
185		data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']]
186		assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']]
187
188
189		def test_natsorted_applies_key_to_each_list_element_before_sorting_list():
190		b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
191		assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')]
192
193
194		def test_natsorted_returns_list_in_reversed_order_with_reverse_option():
195		a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
196		assert natsorted(a, reverse=True) == natsorted(a)[::-1]
197
198
199		def test_natsorted_sorts_OS_generated_paths_incorrectly_without_PATH_option():
200		a = ['/p/Folder (10)/file.tar.gz',
201		'/p/Folder/file.tar.gz',
202		'/p/Folder (1)/file (1).tar.gz',
203		'/p/Folder (1)/file.tar.gz']
204		assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz',
205		'/p/Folder (1)/file.tar.gz',
206		'/p/Folder (10)/file.tar.gz',
207		'/p/Folder/file.tar.gz']
208
209
210		def test_natsorted_sorts_OS_generated_paths_correctly_with_PATH_option():
211		a = ['/p/Folder (10)/file.tar.gz',
212		'/p/Folder/file.tar.gz',
213		'/p/Folder (1)/file (1).tar.gz',
214		'/p/Folder (1)/file.tar.gz']
215		assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz',
216		'/p/Folder (1)/file.tar.gz',
217		'/p/Folder (1)/file (1).tar.gz',
218		'/p/Folder (10)/file.tar.gz']
219
220
221		def test_natsorted_can_handle_sorting_paths_and_numbers_with_PATH():
222		# You can sort paths and numbers, not that you'd want to
223		a = ['/Folder (9)/file.exe', 43]
224		assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe']
225
226
227		def test_natsorted_returns_results_in_ASCII_order_with_no_case_options():
228		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
229		assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
230
231
232		def test_natsorted_returns_results_sorted_by_lowercase_ASCII_order_with_IGNORECASE():
233		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
234		assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']
235
236
237		def test_natsorted_returns_results_in_ASCII_order_but_with_lowercase_letters_first_with_LOWERCASEFIRST():
238		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
239		assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
240
241
242		def test_natsorted_returns_results_with_uppercase_and_lowercase_letters_grouped_together_with_GROUPLETTERS():
243		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
244		assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
245
246
247		def test_natsorted_returns_results_in_natural_order_with_GROUPLETTERS_and_LOWERCASEFIRST():
248		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
249		assert natsorted(a, alg=ns.G \| ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
250
251
252		def test_natsorted_places_uppercase_letters_before_lowercase_letters_for_nested_input():
253		b = [('A5', 'a6'), ('a3', 'a1')]
254		assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')]
255
256
257		def test_natsorted_with_LOWERCASEFIRST_places_lowercase_letters_before_uppercase_letters_for_nested_input():
258		b = [('A5', 'a6'), ('a3', 'a1')]
259		assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')]
260
261
262		def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input():
263		b = [('A5', 'a6'), ('a3', 'a1')]
264		assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')]
265
266
267		def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters():
268		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
269		load_locale('en_US')
270		assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
271		locale.setlocale(locale.LC_ALL, str(''))
272
273
274		def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped():
275		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
276		load_locale('en_US')
277		assert natsorted(a, alg=ns.LOCALE \| ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
278		locale.setlocale(locale.LC_ALL, str(''))
279
280
281		def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters():
282		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
283		load_locale('en_US')
284		assert natsorted(a, alg=ns.LOCALE \| ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
285		locale.setlocale(locale.LC_ALL, str(''))
286
287
288		def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped():
289		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
290		load_locale('en_US')
291		assert natsorted(a, alg=ns.LOCALE \| ns.CAPITALFIRST \| ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
292		locale.setlocale(locale.LC_ALL, str(''))
293
294
295		def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language():
296		load_locale('en_US')
297		a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
298		assert natsorted(a, alg=ns.LOCALE \| ns.F) == ['a5,6', 'a5,50', 'ä', 'b', 'c']
299		locale.setlocale(locale.LC_ALL, str(''))
300
301
302		@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale')
303		def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language():
304		load_locale('de_DE')
305		a = ['c', 'ä', 'b', 'a5,6', 'a5,50']
306		assert natsorted(a, alg=ns.LOCALE \| ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c']
307		locale.setlocale(locale.LC_ALL, str(''))
308
309
310		def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error():
311		load_locale('en_US')
312		a = ['0', 'Á', '2', 'Z']
313		assert natsorted(a) == ['0', '2', 'Z', 'Á']
314		a = ['2', 'ä', 'b', 1.5, 3]
315		assert natsorted(a, alg=ns.LOCALE) == [1.5, '2', 3, 'ä', 'b']
316		locale.setlocale(locale.LC_ALL, str(''))
317
318
319		def test_versorted_returns_results_identical_to_natsorted():
320		a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
321		# versorted is retained for backwards compatibility
322		assert versorted(a) == natsorted(a)
323
324
325		def test_realsorted_returns_results_identical_to_natsorted_with_REAL():
326		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
327		assert realsorted(a) == natsorted(a, alg=ns.REAL)
328
329
330		def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE():
331		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
332		assert humansorted(a) == natsorted(a, alg=ns.LOCALE)
333
334
335		def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list():
336		a = ['num3', 'num5', 'num2']
337		b = ['foo', 'bar', 'baz']
338		index = index_natsorted(a)
339		assert index == [2, 0, 1]
340		assert [a[i] for i in index] == ['num2', 'num3', 'num5']
341		assert [b[i] for i in index] == ['baz', 'foo', 'bar']
342
343
344		def test_index_natsorted_returns_reversed_integer_list_of_sort_order_for_input_list_with_reverse_option():
345		a = ['num3', 'num5', 'num2']
346		assert index_natsorted(a, reverse=True) == [1, 0, 2]
347
348
349		def test_index_natsorted_applies_key_function_before_sorting():
350		c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
351		assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1]
352
353
354		def test_index_natsorted_handles_unorderable_types_error_on_Python3():
355		a = [46, '5a5b2', 'af5', '5a5-4']
356		assert index_natsorted(a) == [3, 1, 0, 2]
357
358
359		def test_index_natsorted_returns_integer_list_of_nested_input_list():
360		data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']]
361		assert index_natsorted(data) == [0, 1, 3, 2]
362
363
364		def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_with_PATH():
365		a = ['/p/Folder (10)/',
366		'/p/Folder/',
367		'/p/Folder (1)/']
368		assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0]
369
370
371		def test_index_versorted_returns_results_identical_to_index_natsorted():
372		a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
373		# index_versorted is retained for backwards compatibility
374		assert index_versorted(a) == index_natsorted(a)
375
376
377		def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL():
378		a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
379		assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL)
380
381
382		def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
383		a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
384		assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
385
386
387		def test_order_by_index_sorts_list_according_to_order_of_integer_list():
388		a = ['num3', 'num5', 'num2']
389		index = [2, 0, 1]
390		assert order_by_index(a, index) == ['num2', 'num3', 'num5']
391		assert order_by_index(a, index) == [a[i] for i in index]
392
393
394		def test_order_by_index_returns_generator_with_iter_True():
395		a = ['num3', 'num5', 'num2']
396		index = [2, 0, 1]
397		assert order_by_index(a, index, True) != [a[i] for i in index]
398		assert list(order_by_index(a, index, True)) == [a[i] for i in index]

+105

-0

test_natsort/test_natsort_cmp.py less more

	0	# -- coding: utf-8 --
	1	# pylint: disable=unused-variable
	2	"""These test the natcmp() function.
	3
	4	Note that these tests are only relevant for Python version < 3.
	5	"""
	6	import sys
	7	from functools import partial
	8	from compat.mock import patch
	9
	10	import pytest
	11	from hypothesis import given
	12	from hypothesis.strategies import floats, integers, lists
	13
	14	from natsort import ns
	15
	16	from natsort.compat.py23 import py23_cmp
	17
	18	PY_VERSION = float(sys.version[:3])
	19
	20	if PY_VERSION < 3:
	21	from natsort import natcmp
	22
	23
	24	class Comparable(object):
	25	"""Stub class for testing natcmp functionality."""
	26	def __init__(self, value):
	27	self.value = value
	28
	29	def __cmp__(self, other):
	30	return natcmp(self.value, other.value)
	31
	32
	33	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	34	def test__classes_can_be_compared():
	35	one = Comparable("1")
	36	two = Comparable("2")
	37	another_two = Comparable("2")
	38	ten = Comparable("10")
	39
	40	assert ten > two == another_two > one
	41
	42
	43	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	44	def test__keys_are_being_cached():
	45	natcmp.cached_keys = {}
	46	assert len(natcmp.cached_keys) == 0
	47	natcmp(0, 0)
	48	assert len(natcmp.cached_keys) == 1
	49	natcmp(0, 0)
	50	assert len(natcmp.cached_keys) == 1
	51
	52	with patch('natsort.compat.locale.dumb_sort', return_value=False):
	53	natcmp(0, 0, alg=ns.L)
	54	assert len(natcmp.cached_keys) == 2
	55	natcmp(0, 0, alg=ns.L)
	56	assert len(natcmp.cached_keys) == 2
	57
	58	with patch('natsort.compat.locale.dumb_sort', return_value=True):
	59	natcmp(0, 0, alg=ns.L)
	60	assert len(natcmp.cached_keys) == 3
	61	natcmp(0, 0, alg=ns.L)
	62	assert len(natcmp.cached_keys) == 3
	63
	64
	65	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	66	def test__illegal_algorithm_raises_error():
	67	try:
	68	natcmp(0, 0, alg="Just random stuff")
	69	assert False
	70
	71	except ValueError:
	72	assert True
	73
	74	except Exception:
	75	assert False
	76
	77
	78	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	79	def test__classes_can_utilize_max_or_min():
	80	comparables = [Comparable(i) for i in range(10)]
	81
	82	assert max(comparables) == comparables[-1]
	83	assert min(comparables) == comparables[0]
	84
	85
	86	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	87	@given(integers(), integers())
	88	def test__natcmp_works_the_same_for_integers_as_cmp(x, y):
	89	assert py23_cmp(x, y) == natcmp(x, y)
	90
	91
	92	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	93	@given(floats(allow_nan=False), floats(allow_nan=False))
	94	def test__natcmp_works_the_same_for_floats_as_cmp(x, y):
	95	assert py23_cmp(x, y) == natcmp(x, y)
	96
	97
	98	@pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3')
	99	@given(lists(elements=integers()))
	100	def test_sort_strings_with_numbers(a_list):
	101	strings = [str(var) for var in a_list]
	102	natcmp_sorted = sorted(strings, cmp=partial(natcmp, alg=ns.SIGNED))
	103
	104	assert sorted(a_list) == [int(var) for var in natcmp_sorted]

+98

-0

test_natsort/test_natsort_key.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	import pytest
	5	from natsort.compat.py23 import PY_VERSION
	6	from natsort.ns_enum import ns
	7	from natsort.utils import (
	8	_natsort_key,
	9	_regex_chooser,
	10	_parse_string_factory,
	11	_parse_path_factory,
	12	_parse_number_factory,
	13	_parse_bytes_factory,
	14	_input_string_transform_factory,
	15	_string_component_transform_factory,
	16	_final_data_transform_factory,
	17	)
	18	from hypothesis import (
	19	given,
	20	)
	21	from hypothesis.strategies import (
	22	lists,
	23	text,
	24	floats,
	25	integers,
	26	binary,
	27	)
	28
	29	if PY_VERSION >= 3:
	30	long = int
	31
	32
	33	regex = _regex_chooser[ns.INT]
	34	pre = _input_string_transform_factory(ns.INT)
	35	post = _string_component_transform_factory(ns.INT)
	36	after = _final_data_transform_factory(ns.INT, '', '')
	37	string_func = _parse_string_factory(ns.INT, '', regex.split, pre, post, after)
	38	bytes_func = _parse_bytes_factory(ns.INT)
	39	num_func = _parse_number_factory(ns.INT, '', '')
	40
	41
	42	def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
	43	# It gracefully handles as_path for numeric input by putting an extra tuple around it
	44	# so it will sort against the other as_path results.
	45	sfunc = _parse_path_factory(string_func)
	46	bytes_func = _parse_bytes_factory(ns.PATH)
	47	num_func = _parse_number_factory(ns.PATH, '', '')
	48	assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),)
	49
	50
	51	@pytest.mark.skipif(PY_VERSION < 3, reason='only valid on python3')
	52	def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple():
	53	# It gracefully handles as_path for numeric input by putting an extra tuple around it
	54	# so it will sort against the other as_path results.
	55	sfunc = _parse_path_factory(string_func)
	56	bytes_func = _parse_bytes_factory(ns.PATH)
	57	num_func = _parse_number_factory(ns.PATH, '', '')
	58	assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),)
	59
	60
	61	def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
	62	# PATH also handles recursion well.
	63	sfunc = _parse_path_factory(string_func)
	64	bytes_func = _parse_bytes_factory(ns.PATH)
	65	num_func = _parse_number_factory(ns.PATH, '', '')
	66	assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))
	67
	68
	69	# The remaining tests provide no examples, just hypothesis tests.
	70	# They only confirm that _natsort_key uses the above building blocks.
	71
	72
	73	@given(floats(allow_nan=False) \| integers())
	74	def test__natsort_key_with_numeric_input_takes_number_path(x):
	75	assert _natsort_key(x, None, string_func, bytes_func, num_func) == num_func(x)
	76
	77
	78	@pytest.mark.skipif(PY_VERSION < 3, reason='only valid on python3')
	79	@given(binary().filter(bool))
	80	def test__natsort_key_with_bytes_input_takes_bytes_path(x):
	81	assert _natsort_key(x, None, string_func, bytes_func, num_func) == bytes_func(x)
	82
	83
	84	@given(lists(elements=floats(allow_nan=False) \| text() \| integers(), min_size=1, max_size=10))
	85	def test__natsort_key_with_text_input_takes_string_path(x):
	86	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	87	assert _natsort_key(s, None, string_func, bytes_func, num_func) == string_func(s)
	88
	89
	90	@given(lists(elements=text(), min_size=1, max_size=10))
	91	def test__natsort_key_with_nested_input_takes_nested_path(x):
	92	assert _natsort_key(x, None, string_func, bytes_func, num_func) == tuple(string_func(s) for s in x)
	93
	94
	95	@given(text())
	96	def test__natsort_key_with_key_argument_applies_key_before_processing(x):
	97	assert _natsort_key(x, len, string_func, bytes_func, num_func) == num_func(len(x))

+105

-0

test_natsort/test_natsort_keygen.py less more

	0	# -- coding: utf-8 --
	1	"""\
	2	Here are a collection of examples of how this module can be used.
	3	See the README or the natsort homepage for more details.
	4	"""
	5	from __future__ import unicode_literals, print_function
	6
	7	import warnings
	8	import locale
	9	from pytest import raises
	10	from natsort import (
	11	natsorted,
	12	natsort_key,
	13	natsort_keygen,
	14	ns,
	15	)
	16	from natsort.compat.py23 import PY_VERSION
	17	from natsort.compat.locale import (
	18	null_string_locale,
	19	get_strxfrm,
	20	)
	21	from compat.mock import patch
	22	from compat.locale import load_locale
	23
	24	INPUT = ['6A-5.034e+1', '/Folder (1)/Foo', 56.7]
	25
	26
	27	def test_natsort_key_public_raises_DeprecationWarning_when_called():
	28	# But it raises a deprecation warning
	29	with warnings.catch_warnings(record=True) as w:
	30	warnings.simplefilter("always")
	31	assert natsort_key('a-5.034e2') == ('a-', 5, '.', 34, 'e', 2)
	32	assert len(w) == 1
	33	assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message)
	34	# It is called for each element in a list when sorting
	35	with warnings.catch_warnings(record=True) as w:
	36	warnings.simplefilter("always")
	37	a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6']
	38	a.sort(key=natsort_key)
	39	assert len(w) == 7
	40
	41
	42	def test_natsort_keygen_with_invalid_alg_input_raises_ValueError():
	43	# Invalid arguments give the correct response
	44	with raises(ValueError) as err:
	45	natsort_keygen(None, '1')
	46	assert str(err.value) == "natsort_keygen: 'alg' argument must be from the enum 'ns', got 1"
	47
	48
	49	def test_natsort_keygen_returns_natsort_key_that_parses_input():
	50	a = 'a-5.034e1'
	51	assert natsort_keygen()(a) == ('a-', 5, '.', 34, 'e', 1)
	52	assert natsort_keygen(alg=ns.F \| ns.S)(a) == ('a', -50.34)
	53
	54
	55	def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted():
	56	a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
	57	b = a[:]
	58	a.sort(key=natsort_keygen(alg=ns.F))
	59	assert a == natsorted(b, alg=ns.F)
	60
	61
	62	def test_natsort_keygen_splits_input_with_defaults():
	63	assert natsort_keygen()(INPUT) == (('', 6, 'A-', 5, '.', 34, 'e+', 1), ('/Folder (', 1, ')/Foo'), ('', 56.7))
	64	if PY_VERSION >= 3: assert natsort_keygen()(b'6A-5.034e+1') == (b'6A-5.034e+1',)
	65
	66
	67	def test_natsort_keygen_splits_input_with_real():
	68	assert natsort_keygen(alg=ns.R)(INPUT) == (('', 6.0, 'A', -50.34), ('/Folder (', 1.0, ')/Foo'), ('', 56.7))
	69	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.R)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
	70
	71
	72	def test_natsort_keygen_splits_input_with_lowercasefirst_noexp_float():
	73	assert natsort_keygen(alg=ns.LF \| ns.F \| ns.N)(INPUT) == (('', 6.0, 'a-', 5.034, 'E+', 1.0), ('/fOLDER (', 1.0, ')/fOO'), ('', 56.7))
	74	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LF \| ns.F \| ns.N)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
	75
	76
	77	def test_natsort_keygen_splits_input_with_locale():
	78	load_locale('en_US')
	79	strxfrm = get_strxfrm()
	80	with patch('natsort.compat.locale.dumb_sort', return_value=False):
	81	assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string_locale, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1), (strxfrm('/Folder ('), 1, strxfrm(')/Foo')), (null_string_locale, 56.7))
	82	with patch('natsort.compat.locale.dumb_sort', return_value=True):
	83	assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string_locale, 6, strxfrm('aa--'), 5, strxfrm('..'), 34, strxfrm('eE++'), 1), (strxfrm('//ffoOlLdDeErR (('), 1, strxfrm('))//ffoOoO')), (null_string_locale, 56.7))
	84	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
	85	locale.setlocale(locale.LC_ALL, str(''))
	86
	87
	88	def test_natsort_keygen_splits_input_with_locale_and_capitalfirst():
	89	load_locale('en_US')
	90	strxfrm = get_strxfrm()
	91	with patch('natsort.compat.locale.dumb_sort', return_value=False):
	92	assert natsort_keygen(alg=ns.LA \| ns.C)(INPUT) == ((('',), (null_string_locale, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string_locale, 56.7)))
	93	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA \| ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',)
	94	locale.setlocale(locale.LC_ALL, str(''))
	95
	96
	97	def test_natsort_keygen_splits_input_with_path():
	98	assert natsort_keygen(alg=ns.P \| ns.G)(INPUT) == ((('', 6, 'aA--', 5, '..', 34, 'ee++', 1),), (('//',), ('fFoollddeerr ((', 1, '))'), ('fFoooo',)), (('', 56.7),))
	99	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.P \| ns.G)(b'6A-5.034e+1') == ((b'6A-5.034e+1',),)
	100
	101
	102	def test_natsort_keygen_splits_input_with_ignorecase():
	103	assert natsort_keygen(alg=ns.IC)(INPUT) == (('', 6, 'a-', 5, '.', 34, 'e+', 1), ('/folder (', 1, ')/foo'), ('', 56.7))
	104	if PY_VERSION >= 3: assert natsort_keygen(alg=ns.IC)(b'6A-5.034e+1') == (b'6a-5.034e+1',)

+292

-0

test_natsort/test_natsorted.py less more

	0	# -- coding: utf-8 --
	1	"""\
	2	Here are a collection of examples of how this module can be used.
	3	See the README or the natsort homepage for more details.
	4	"""
	5	from __future__ import unicode_literals, print_function
	6	import pytest
	7	import locale
	8	from natsort.compat.py23 import PY_VERSION
	9	from operator import itemgetter
	10	from pytest import raises
	11	from natsort import (
	12	natsorted,
	13	ns,
	14	)
	15	from compat.locale import (
	16	load_locale,
	17	has_locale_de_DE,
	18	)
	19
	20
	21	def test_natsorted_returns_strings_with_numbers_in_ascending_order():
	22	a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6']
	23	assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10']
	24
	25
	26	def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents():
	27	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	28	assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.']
	29
	30
	31	def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option():
	32	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	33	assert natsorted(a, alg=ns.N \| ns.F \| ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
	34	# UNSIGNED is default
	35	assert natsorted(a, alg=ns.NOEXP \| ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50']
	36
	37
	38	def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option():
	39	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	40	assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
	41	# INT is default
	42	assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
	43
	44
	45	def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option():
	46	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	47	assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
	48	assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50']
	49
	50
	51	def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option():
	52	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	53	assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.']
	54
	55
	56	def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option():
	57	a = ['a-5', 'a7', 'a+2']
	58	assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7']
	59
	60
	61	def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option():
	62	a = ['a-5', 'a7', 'a+2']
	63	assert natsorted(a) == ['a7', 'a+2', 'a-5']
	64
	65
	66	def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option():
	67	a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
	68	assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
	69	assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4']
	70
	71
	72	def test_natsorted_returns_sorted_list_with_mixed_type_input_and_does_not_raise_TypeError_on_Python3():
	73	# You can mix types with natsorted. This can get around the new
	74	# 'unorderable types' issue with Python 3.
	75	a = [6, 4.5, '7', '2.5', 'a']
	76	assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a']
	77	a = [46, '5a5b2', 'af5', '5a5-4']
	78	assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5']
	79
	80
	81	def test_natsorted_with_mixed_input_returns_sorted_results_without_error():
	82	a = ['0', 'Á', '2', 'Z']
	83	assert natsorted(a) == ['0', '2', 'Á', 'Z']
	84	assert natsorted(a, alg=ns.NUMAFTER) == ['Á', 'Z', '0', '2']
	85	a = ['2', 'ä', 'b', 1.5, 3]
	86	assert natsorted(a) == [1.5, '2', 3, 'ä', 'b']
	87	assert natsorted(a, alg=ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3]
	88
	89
	90	def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST():
	91	a = ['25', 5, float('nan'), 1E40]
	92	# The slice is because NaN != NaN
	93	assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3]
	94
	95
	96	def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST():
	97	a = ['25', 5, float('nan'), 1E40]
	98	# The slice is because NaN != NaN
	99	assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:]
	100
	101
	102	def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3():
	103	if PY_VERSION >= 3:
	104	with raises(TypeError) as e:
	105	assert natsorted(['ä', b'b'])
	106	assert 'bytes' in str(e.value)
	107	else:
	108	assert True
	109
	110
	111	def test_natsorted_raises_ValueError_for_non_iterable_input():
	112	with raises(TypeError) as err:
	113	natsorted(100)
	114	assert str(err.value) == "'int' object is not iterable"
	115
	116
	117	def test_natsorted_recursivley_applies_key_to_nested_lists_to_return_sorted_nested_list():
	118	data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']]
	119	assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']]
	120
	121
	122	def test_natsorted_applies_key_to_each_list_element_before_sorting_list():
	123	b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
	124	assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')]
	125
	126
	127	def test_natsorted_returns_list_in_reversed_order_with_reverse_option():
	128	a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300']
	129	assert natsorted(a, reverse=True) == natsorted(a)[::-1]
	130
	131
	132	def test_natsorted_sorts_OS_generated_paths_incorrectly_without_PATH_option():
	133	a = ['/p/Folder (10)/file.tar.gz',
	134	'/p/Folder/file.tar.gz',
	135	'/p/Folder (1)/file (1).tar.gz',
	136	'/p/Folder (1)/file.tar.gz']
	137	assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz',
	138	'/p/Folder (1)/file.tar.gz',
	139	'/p/Folder (10)/file.tar.gz',
	140	'/p/Folder/file.tar.gz']
	141
	142
	143	def test_natsorted_sorts_OS_generated_paths_correctly_with_PATH_option():
	144	a = ['/p/Folder (10)/file.tar.gz',
	145	'/p/Folder/file.tar.gz',
	146	'/p/Folder (1)/file (1).tar.gz',
	147	'/p/Folder (1)/file.tar.gz']
	148	assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz',
	149	'/p/Folder (1)/file.tar.gz',
	150	'/p/Folder (1)/file (1).tar.gz',
	151	'/p/Folder (10)/file.tar.gz']
	152
	153
	154	def test_natsorted_can_handle_sorting_paths_and_numbers_with_PATH():
	155	# You can sort paths and numbers, not that you'd want to
	156	a = ['/Folder (9)/file.exe', 43]
	157	assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe']
	158
	159
	160	def test_natsorted_returns_results_in_ASCII_order_with_no_case_options():
	161	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	162	assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
	163
	164
	165	def test_natsorted_returns_results_sorted_by_lowercase_ASCII_order_with_IGNORECASE():
	166	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	167	assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn']
	168
	169
	170	def test_natsorted_returns_results_in_ASCII_order_but_with_lowercase_letters_first_with_LOWERCASEFIRST():
	171	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	172	assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
	173
	174
	175	def test_natsorted_returns_results_with_uppercase_and_lowercase_letters_grouped_together_with_GROUPLETTERS():
	176	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	177	assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
	178
	179
	180	def test_natsorted_returns_results_in_natural_order_with_GROUPLETTERS_and_LOWERCASEFIRST():
	181	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	182	assert natsorted(a, alg=ns.G \| ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
	183
	184
	185	def test_natsorted_places_uppercase_letters_before_lowercase_letters_for_nested_input():
	186	b = [('A5', 'a6'), ('a3', 'a1')]
	187	assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')]
	188
	189
	190	def test_natsorted_with_LOWERCASEFIRST_places_lowercase_letters_before_uppercase_letters_for_nested_input():
	191	b = [('A5', 'a6'), ('a3', 'a1')]
	192	assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')]
	193
	194
	195	def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input():
	196	b = [('A5', 'a6'), ('a3', 'a1')]
	197	assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')]
	198
	199
	200	def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters():
	201	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	202	load_locale('en_US')
	203	assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn']
	204	locale.setlocale(locale.LC_ALL, str(''))
	205
	206
	207	def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped():
	208	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	209	load_locale('en_US')
	210	assert natsorted(a, alg=ns.LOCALE \| ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn']
	211	locale.setlocale(locale.LC_ALL, str(''))
	212
	213
	214	def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters():
	215	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	216	load_locale('en_US')
	217	assert natsorted(a, alg=ns.LOCALE \| ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']
	218	locale.setlocale(locale.LC_ALL, str(''))
	219
	220
	221	def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped():
	222	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	223	load_locale('en_US')
	224	assert natsorted(a, alg=ns.LOCALE \| ns.CAPITALFIRST \| ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']
	225	locale.setlocale(locale.LC_ALL, str(''))
	226
	227
	228	def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language():
	229	load_locale('en_US')
	230	a = ['c', 'a5,467.86', 'ä', 'b', 'a5367.86', 'a5,6', 'a5,50']
	231	assert natsorted(a, alg=ns.LOCALE \| ns.F) == ['a5,6', 'a5,50', 'a5367.86', 'a5,467.86', 'ä', 'b', 'c']
	232	locale.setlocale(locale.LC_ALL, str(''))
	233
	234
	235	@pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale')
	236	def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language():
	237	load_locale('de_DE')
	238	a = ['c', 'a5.467,86', 'ä', 'b', 'a5367.86', 'a5,6', 'a5,50']
	239	assert natsorted(a, alg=ns.LOCALE \| ns.F) == ['a5,50', 'a5,6', 'a5367.86', 'a5.467,86', 'ä', 'b', 'c']
	240	locale.setlocale(locale.LC_ALL, str(''))
	241
	242
	243	def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error():
	244	load_locale('en_US')
	245	a = ['0', 'Á', '2', 'Z']
	246	assert natsorted(a, alg=ns.LOCALE) == ['0', '2', 'Á', 'Z']
	247	assert natsorted(a, alg=ns.LOCALE \| ns.NUMAFTER) == ['Á', 'Z', '0', '2']
	248	a = ['2', 'ä', 'b', 1.5, 3]
	249	assert natsorted(a, alg=ns.LOCALE) == [1.5, '2', 3, 'ä', 'b']
	250	assert natsorted(a, alg=ns.LOCALE \| ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3]
	251	locale.setlocale(locale.LC_ALL, str(''))
	252
	253
	254	def test_natsorted_with_LOCALE_and_UNGROUPLETTERS_and_mixed_input_returns_sorted_results_without_error():
	255	load_locale('en_US')
	256	a = ['0', 'Á', '2', 'Z']
	257	assert natsorted(a, alg=ns.LOCALE \| ns.UNGROUPLETTERS) == ['0', '2', 'Á', 'Z']
	258	assert natsorted(a, alg=ns.LOCALE \| ns.UNGROUPLETTERS \| ns.NUMAFTER) == ['Á', 'Z', '0', '2']
	259	a = ['2', 'ä', 'b', 1.5, 3]
	260	assert natsorted(a, alg=ns.LOCALE \| ns.UNGROUPLETTERS) == [1.5, '2', 3, 'ä', 'b']
	261	assert natsorted(a, alg=ns.LOCALE \| ns.UNGROUPLETTERS \| ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3]
	262	locale.setlocale(locale.LC_ALL, str(''))
	263
	264
	265	def test_natsorted_with_PATH_and_LOCALE_and_UNGROUPLETTERS_and_mixed_input_returns_sorted_results_without_error():
	266	load_locale('en_US')
	267	a = ['0', 'Á', '2', 'Z']
	268	assert natsorted(a, alg=ns.PATH \| ns.LOCALE \| ns.UNGROUPLETTERS) == ['0', '2', 'Á', 'Z']
	269	assert natsorted(a, alg=ns.PATH \| ns.LOCALE \| ns.UNGROUPLETTERS \| ns.NUMAFTER) == ['Á', 'Z', '0', '2']
	270	a = ['2', 'ä', 'b', 1.5, 3]
	271	assert natsorted(a, alg=ns.PATH \| ns.LOCALE \| ns.UNGROUPLETTERS) == [1.5, '2', 3, 'ä', 'b']
	272	assert natsorted(a, alg=ns.PATH \| ns.LOCALE \| ns.UNGROUPLETTERS \| ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3]
	273	locale.setlocale(locale.LC_ALL, str(''))
	274
	275
	276	def test_natsorted_sorts_an_odd_collection_of_string():
	277	a = ['Corn', 'apple', 'Banana', '73', 'Apple', '5039', 'corn', '~~~~~~', 'banana']
	278	assert natsorted(a) == ['73', '5039', 'Apple', 'Banana', 'Corn',
	279	'apple', 'banana', 'corn', '~~~~~~']
	280	assert natsorted(a, alg=ns.NUMAFTER) == ['Apple', 'Banana', 'Corn',
	281	'apple', 'banana', 'corn', '~~~~~~', '73', '5039']
	282
	283
	284	def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers():
	285	a = ['1st street', '10th street', '2nd street', '2 street', '1 street', '1street',
	286	'11 street', 'street 2', 'street 1', 'Street 11', '۲ street', '۱ street', '۱street',
	287	'۱۲street', '۱۱ street', 'street ۲', 'street ۱', 'street ۱', 'street ۱۲', 'street ۱۱']
	288	expected = ['1 street', '۱ street', '1st street', '1street', '۱street', '2 street', '۲ street',
	289	'2nd street', '10th street', '11 street', '۱۱ street', '۱۲street', 'street 1',
	290	'street ۱', 'street ۱', 'street 2', 'street ۲', 'Street 11', 'street ۱۱', 'street ۱۲']
	291	assert natsorted(a, alg=ns.IGNORECASE) == expected

+126

-0

test_natsort/test_natsorted_convenience.py less more

	0	# -- coding: utf-8 --
	1	"""\
	2	Here are a collection of examples of how this module can be used.
	3	See the README or the natsort homepage for more details.
	4	"""
	5	from __future__ import unicode_literals, print_function
	6	from operator import itemgetter
	7	from natsort.compat.py23 import PY_VERSION
	8	from natsort import (
	9	natsorted,
	10	index_natsorted,
	11	versorted,
	12	index_versorted,
	13	humansorted,
	14	index_humansorted,
	15	realsorted,
	16	index_realsorted,
	17	order_by_index,
	18	ns,
	19	decoder,
	20	as_ascii,
	21	as_utf8,
	22	)
	23
	24
	25	def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is():
	26	f = decoder('latin1')
	27	a = 'bytes'
	28	b = 14
	29	assert f(b'bytes') == a
	30	assert f(b) is b # returns as-is, same object ID
	31	if PY_VERSION >= 3:
	32	assert f(a) is a # same object returned on Python3 b/c only bytes has decode
	33	else:
	34	assert f(a) is not a
	35	assert f(a) == a # not same object on Python2 because str can decode
	36
	37
	38	def test_as_ascii_returns_bytes_as_ascii():
	39	assert decoder('ascii')(b'bytes') == as_ascii(b'bytes')
	40
	41
	42	def test_as_utf8_returns_bytes_as_utf8():
	43	assert decoder('utf8')(b'bytes') == as_utf8(b'bytes')
	44
	45
	46	def test_versorted_returns_results_identical_to_natsorted():
	47	a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
	48	# versorted is retained for backwards compatibility
	49	assert versorted(a) == natsorted(a)
	50
	51
	52	def test_realsorted_returns_results_identical_to_natsorted_with_REAL():
	53	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	54	assert realsorted(a) == natsorted(a, alg=ns.REAL)
	55
	56
	57	def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE():
	58	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	59	assert humansorted(a) == natsorted(a, alg=ns.LOCALE)
	60
	61
	62	def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list():
	63	a = ['num3', 'num5', 'num2']
	64	b = ['foo', 'bar', 'baz']
	65	index = index_natsorted(a)
	66	assert index == [2, 0, 1]
	67	assert [a[i] for i in index] == ['num2', 'num3', 'num5']
	68	assert [b[i] for i in index] == ['baz', 'foo', 'bar']
	69
	70
	71	def test_index_natsorted_returns_reversed_integer_list_of_sort_order_for_input_list_with_reverse_option():
	72	a = ['num3', 'num5', 'num2']
	73	assert index_natsorted(a, reverse=True) == [1, 0, 2]
	74
	75
	76	def test_index_natsorted_applies_key_function_before_sorting():
	77	c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')]
	78	assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1]
	79
	80
	81	def test_index_natsorted_handles_unorderable_types_error_on_Python3():
	82	a = [46, '5a5b2', 'af5', '5a5-4']
	83	assert index_natsorted(a) == [3, 1, 0, 2]
	84
	85
	86	def test_index_natsorted_returns_integer_list_of_nested_input_list():
	87	data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']]
	88	assert index_natsorted(data) == [0, 1, 3, 2]
	89
	90
	91	def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_with_PATH():
	92	a = ['/p/Folder (10)/',
	93	'/p/Folder/',
	94	'/p/Folder (1)/']
	95	assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0]
	96
	97
	98	def test_index_versorted_returns_results_identical_to_index_natsorted():
	99	a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1']
	100	# index_versorted is retained for backwards compatibility
	101	assert index_versorted(a) == index_natsorted(a)
	102
	103
	104	def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL():
	105	a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300']
	106	assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL)
	107
	108
	109	def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE():
	110	a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana']
	111	assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE)
	112
	113
	114	def test_order_by_index_sorts_list_according_to_order_of_integer_list():
	115	a = ['num3', 'num5', 'num2']
	116	index = [2, 0, 1]
	117	assert order_by_index(a, index) == ['num2', 'num3', 'num5']
	118	assert order_by_index(a, index) == [a[i] for i in index]
	119
	120
	121	def test_order_by_index_returns_generator_with_iter_True():
	122	a = ['num3', 'num5', 'num2']
	123	index = [2, 0, 1]
	124	assert order_by_index(a, index, True) != [a[i] for i in index]
	125	assert list(order_by_index(a, index, True)) == [a[i] for i in index]

+48

-0

test_natsort/test_parse_bytes_function.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	from natsort.ns_enum import ns
	5	from natsort.utils import _parse_bytes_factory
	6	from hypothesis import given
	7	from hypothesis.strategies import binary
	8
	9
	10	# Each test has an "example" version for demonstrative purposes,
	11	# and a test that uses the hypothesis module.
	12
	13
	14	def test_parse_bytes_factory_makes_function_that_returns_tuple_example():
	15	assert _parse_bytes_factory(0)(b'hello') == (b'hello',)
	16
	17
	18	@given(binary())
	19	def test_parse_bytes_factory_makes_function_that_returns_tuple(x):
	20	assert _parse_bytes_factory(0)(x) == (x,)
	21
	22
	23	def test_parse_bytes_factory_with_IGNORECASE_makes_function_that_returns_tuple_with_lowercase_example():
	24	assert _parse_bytes_factory(ns.IGNORECASE)(b'HelLo') == (b'hello',)
	25
	26
	27	@given(binary())
	28	def test_parse_bytes_factory_with_IGNORECASE_makes_function_that_returns_tuple_with_lowercase(x):
	29	assert _parse_bytes_factory(ns.IGNORECASE)(x) == (x.lower(),)
	30
	31
	32	def test_parse_bytes_factory_with_PATH_makes_function_that_returns_nested_tuple_example():
	33	assert _parse_bytes_factory(ns.PATH)(b'hello') == ((b'hello',),)
	34
	35
	36	@given(binary())
	37	def test_parse_bytes_factory_with_PATH_makes_function_that_returns_nested_tuple(x):
	38	assert _parse_bytes_factory(ns.PATH)(x) == ((x,),)
	39
	40
	41	def test_parse_bytes_factory_with_PATH_and_IGNORECASE_makes_function_that_returns_nested_tuple_with_lowercase_example():
	42	assert _parse_bytes_factory(ns.PATH \| ns.IGNORECASE)(b'HelLo') == ((b'hello',),)
	43
	44
	45	@given(binary())
	46	def test_parse_bytes_factory_with_PATH_and_IGNORECASE_makes_function_that_returns_nested_tuple_with_lowercase(x):
	47	assert _parse_bytes_factory(ns.PATH \| ns.IGNORECASE)(x) == ((x.lower(),),)

+55

-0

test_natsort/test_parse_number_function.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	from natsort.ns_enum import ns
	5	from natsort.utils import _parse_number_factory
	6	from hypothesis import (
	7	given,
	8	)
	9	from hypothesis.strategies import (
	10	floats,
	11	integers,
	12	)
	13
	14
	15	# Each test has an "example" version for demonstrative purposes,
	16	# and a test that uses the hypothesis module.
	17
	18
	19	def test_parse_number_factory_makes_function_that_returns_tuple_example():
	20	assert _parse_number_factory(0, '', '')(57) == ('', 57)
	21	assert _parse_number_factory(0, '', '')(float('nan')) == ('', float('-inf'))
	22	assert _parse_number_factory(ns.NANLAST, '', '')(float('nan')) == ('', float('+inf'))
	23
	24
	25	@given(floats(allow_nan=False) \| integers())
	26	def test_parse_number_factory_makes_function_that_returns_tuple(x):
	27	assert _parse_number_factory(0, '', '')(x) == ('', x)
	28
	29
	30	def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple_example():
	31	assert _parse_number_factory(ns.PATH, '', '')(57) == (('', 57),)
	32
	33
	34	@given(floats(allow_nan=False) \| integers())
	35	def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple(x):
	36	assert _parse_number_factory(ns.PATH, '', '')(x) == (('', x),)
	37
	38
	39	def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example():
	40	assert _parse_number_factory(ns.UNGROUPLETTERS \| ns.LOCALE, '', 'xx')(57) == (('xx',), ('', 57))
	41
	42
	43	@given(floats(allow_nan=False) \| integers())
	44	def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x):
	45	assert _parse_number_factory(ns.UNGROUPLETTERS \| ns.LOCALE, '', 'xx')(x) == (('xx',), ('', x))
	46
	47
	48	def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example():
	49	assert _parse_number_factory(ns.PATH \| ns.UNGROUPLETTERS \| ns.LOCALE, '', 'xx')(57) == ((('xx',), ('', 57)),)
	50
	51
	52	@given(floats(allow_nan=False) \| integers())
	53	def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x):
	54	assert _parse_number_factory(ns.PATH \| ns.UNGROUPLETTERS \| ns.LOCALE, '', 'xx')(x) == ((('xx',), ('', x)),)

+155

-0

test_natsort/test_parse_string_function.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	from pytest import raises
	5	from natsort.ns_enum import ns
	6	from natsort.utils import (
	7	_float_sign_exp_re,
	8	_float_nosign_exp_re,
	9	_float_sign_noexp_re,
	10	_float_nosign_noexp_re,
	11	_int_nosign_re,
	12	_int_sign_re,
	13	_parse_string_factory,
	14	_parse_path_factory,
	15	)
	16	from natsort.compat.py23 import py23_str, PY_VERSION
	17	from natsort.compat.fastnumbers import (
	18	fast_float,
	19	fast_int,
	20	)
	21	from slow_splitters import (
	22	int_splitter,
	23	float_splitter,
	24	)
	25	from hypothesis import (
	26	given,
	27	example,
	28	)
	29	from hypothesis.strategies import (
	30	lists,
	31	text,
	32	floats,
	33	integers,
	34	)
	35
	36	if PY_VERSION >= 3:
	37	long = int
	38
	39
	40	def whitespace_check(x):
	41	"""Simplifies testing"""
	42	try:
	43	if x.isspace():
	44	return x in ' \t\n\r\f\v'
	45	else:
	46	return True
	47	except (AttributeError, TypeError):
	48	return True
	49
	50
	51	def no_op(x):
	52	"""A function that does nothing."""
	53	return x
	54
	55
	56	def tuple2(x, dummy):
	57	"""Make the input a tuple."""
	58	return tuple(x)
	59
	60
	61	# Each test has an "example" version for demonstrative purposes,
	62	# and a test that uses the hypothesis module.
	63
	64
	65	def test_parse_string_factory_raises_TypeError_if_given_a_number_example():
	66	with raises(TypeError):
	67	assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(50.0)
	68
	69
	70	@given(floats())
	71	def test_parse_string_factory_raises_TypeError_if_given_a_number(x):
	72	with raises(TypeError):
	73	assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(x)
	74
	75
	76	def test_parse_string_factory_only_parses_digits_with_nosign_int_example():
	77	assert _parse_string_factory(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '+', 5, '.', 34, 'e-', 1)
	78
	79
	80	@given(lists(elements=floats() \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	81	@example([10000000000000000000000000000000000000000000000000000000000000000000000000,
	82	100000000000000000000000000000000000000000000000000000000000000000000000000,
	83	100000000000000000000000000000000000000000000000000000000000000000000000000])
	84	def test_parse_string_factory_only_parses_digits_with_nosign_int(x):
	85	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	86	assert _parse_string_factory(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, False, '')
	87
	88
	89	def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example():
	90	assert _parse_string_factory(0, '', _int_sign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '', 5, '.', 34, 'e', -1)
	91
	92
	93	@given(lists(elements=floats() \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	94	def test_parse_string_factory_parses_digit_with_sign_with_signed_int(x):
	95	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	96	assert _parse_string_factory(0, '', _int_sign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, True, '')
	97
	98
	99	def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example():
	100	assert _parse_string_factory(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 5.034, 'e-', 1.0)
	101
	102
	103	@given(lists(elements=floats(allow_nan=False) \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	104	def test_parse_string_factory_only_parses_float_with_nosign_noexp_float(x):
	105	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	106	assert _parse_string_factory(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, False, '')
	107
	108
	109	def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float_example():
	110	assert _parse_string_factory(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 0.5034)
	111
	112
	113	@given(lists(elements=floats(allow_nan=False) \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	114	def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float(x):
	115	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	116	assert _parse_string_factory(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, True, '')
	117
	118
	119	def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_example():
	120	assert _parse_string_factory(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 5.034, 'e', -1.0)
	121
	122
	123	@given(lists(elements=floats(allow_nan=False) \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	124	def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float(x):
	125	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	126	assert _parse_string_factory(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, False, '')
	127
	128
	129	def test_parse_string_factory_parses_float_with_sign_exp_float_example():
	130	assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 0.5034)
	131	assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('6a5+5.034e-1') == ('', 6.0, 'a', 5.0, '', 0.5034)
	132
	133
	134	@given(lists(elements=floats(allow_nan=False) \| text().filter(whitespace_check) \| integers(), min_size=1, max_size=10))
	135	def test_parse_string_factory_parses_float_with_sign_exp_float(x):
	136	s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
	137	assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, True, '')
	138
	139
	140	def test_parse_string_factory_selects_pre_function_value_if_not_dumb():
	141	def tuple2(x, orig):
	142	"""Make the input a tuple."""
	143	return (orig[0], tuple(x))
	144	assert _parse_string_factory(0, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
	145	assert _parse_string_factory(ns._DUMB, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
	146	assert _parse_string_factory(ns.LOCALE, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1))
	147	assert _parse_string_factory(ns.LOCALE \| ns._DUMB, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('a', ('A', 5, '+', 5, '.', 34, 'E-', 1))
	148
	149
	150	def test_parse_path_function_parses_string_as_path_then_as_string():
	151	splt = _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)
	152	assert _parse_path_factory(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p',), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
	153	assert _parse_path_factory(splt)('../Folder (10)/file (2).tar.gz') == (('..',), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
	154	assert _parse_path_factory(splt)('Folder (10)/file.f34.5nm (2).tar.gz') == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))

+110

-0

test_natsort/test_string_component_transform_factory.py less more

	0	# -- coding: utf-8 --
	1	"""These test the utils.py functions."""
	2	from __future__ import unicode_literals
	3
	4	from natsort.ns_enum import ns
	5	from natsort.utils import (
	6	_string_component_transform_factory,
	7	_groupletters,
	8	)
	9	from natsort.compat.py23 import py23_str
	10	from natsort.compat.locale import get_strxfrm
	11	from natsort.compat.fastnumbers import (
	12	fast_float,
	13	fast_int,
	14	)
	15	from hypothesis import (
	16	given,
	17	)
	18	from hypothesis.strategies import (
	19	text,
	20	floats,
	21	integers,
	22	)
	23	from compat.locale import bad_uni_chars
	24
	25
	26	def no_null(x):
	27	return '\0' not in x
	28
	29
	30	# Each test has an "example" version for demonstrative purposes,
	31	# and a test that uses the hypothesis module.
	32
	33
	34	def test_string_component_transform_factory_returns_fast_int_example():
	35	x = 'hello'
	36	assert _string_component_transform_factory(0)(x) is fast_int(x)
	37	assert _string_component_transform_factory(0)('5007') == fast_int('5007')
	38
	39
	40	@given(text().filter(bool) \| floats() \| integers())
	41	def test_string_component_transform_factory_returns_fast_int(x):
	42	assert _string_component_transform_factory(0)(py23_str(x)) == fast_int(py23_str(x))
	43
	44
	45	def test_string_component_transform_factory_with_FLOAT_returns_fast_float_example():
	46	x = 'hello'
	47	assert _string_component_transform_factory(ns.FLOAT)(x) is fast_float(x)
	48	assert _string_component_transform_factory(ns.FLOAT)('5007') == fast_float('5007')
	49
	50
	51	@given(text().filter(bool) \| floats() \| integers())
	52	def test_string_component_transform_factory_with_FLOAT_returns_fast_float(x):
	53	assert _string_component_transform_factory(ns.FLOAT)(py23_str(x)) == fast_float(py23_str(x), nan=float('-inf'))
	54
	55
	56	def test_string_component_transform_factory_with_FLOAT_returns_fast_float_with_neg_inf_replacing_nan():
	57	assert _string_component_transform_factory(ns.FLOAT)('nan') == fast_float('nan', nan=float('-inf'))
	58
	59
	60	def test_string_component_transform_factory_with_FLOAT_and_NANLAST_returns_fast_float_with_pos_inf_replacing_nan():
	61	assert _string_component_transform_factory(ns.FLOAT \| ns.NANLAST)('nan') == fast_float('nan', nan=float('+inf'))
	62
	63
	64	def test_string_component_transform_factory_with_GROUPLETTERS_returns_fast_int_and_groupletters_example():
	65	x = 'hello'
	66	assert _string_component_transform_factory(ns.GROUPLETTERS)(x) == fast_int(x, key=_groupletters)
	67
	68
	69	@given(text().filter(bool))
	70	def test_string_component_transform_factory_with_GROUPLETTERS_returns_fast_int_and_groupletters(x):
	71	assert _string_component_transform_factory(ns.GROUPLETTERS)(x) == fast_int(x, key=_groupletters)
	72
	73
	74	def test_string_component_transform_factory_with_LOCALE_returns_fast_int_and_groupletters_example():
	75	x = 'hello'
	76	assert _string_component_transform_factory(ns.LOCALE)(x) == fast_int(x, key=get_strxfrm())
	77
	78
	79	@given(text().filter(bool).filter(lambda x: not any(y in bad_uni_chars for y in x)).filter(no_null))
	80	def test_string_component_transform_factory_with_LOCALE_returns_fast_int_and_groupletters(x):
	81	assert _string_component_transform_factory(ns.LOCALE)(x) == fast_int(x, key=get_strxfrm())
	82
	83
	84	def test_string_component_transform_factory_with_LOCALE_and_GROUPLETTERS_returns_fast_int_and_groupletters_and_locale_convert_example():
	85	x = 'hello'
	86	assert _string_component_transform_factory(ns.GROUPLETTERS \| ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x)))
	87
	88
	89	@given(text().filter(bool).filter(no_null))
	90	def test_string_component_transform_factory_with_LOCALE_and_GROUPLETTERS_returns_fast_int_and_groupletters_and_locale_convert(x):
	91	try:
	92	assert _string_component_transform_factory(ns.GROUPLETTERS \| ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x)))
	93	except ValueError as e: # handle broken locale lib on BSD.
	94	if 'is not in range' not in str(e):
	95	raise
	96
	97
	98	def test_string_component_transform_factory_with_LOCALE_and_DUMB_returns_fast_int_and_groupletters_and_locale_convert_example():
	99	x = 'hello'
	100	assert _string_component_transform_factory(ns._DUMB \| ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x)))
	101
	102
	103	@given(text().filter(bool).filter(no_null))
	104	def test_string_component_transform_factory_with_LOCALE_and_DUMB_returns_fast_int_and_groupletters_and_locale_convert(x):
	105	try:
	106	assert _string_component_transform_factory(ns._DUMB \| ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x)))
	107	except ValueError as e: # handle broken locale lib on BSD.
	108	if 'is not in range' not in str(e):
	109	raise

+29

-13

test_natsort/test_unicode_numbers.py less more

5	5	import unicodedata
6	6	from natsort.compat.py23 import py23_range, py23_unichr
7	7	from natsort.unicode_numbers import (
	8	numeric_hex,
8	9	numeric_chars,
9	10	numeric,
10	11	digit_chars,
11	12	digits,
	13	decimal_chars,
	14	decimals,
	15	digits_no_decimals,
	16	numeric_no_decimals,
12	17	)
13	18
14	19

22	27	assert unicodedata.digit(a, None) is not None
23	28
24	29
25		def test_numeric_chars_contains_all_valid_unicode_numeric_characters():
26		for i in py23_range(0X10FFFF):
	30	def test_decimal_chars_contains_only_valid_unicode_decimal_characters():
	31	for a in decimal_chars:
	32	assert unicodedata.decimal(a, None) is not None
	33
	34
	35	def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters():
	36	set_numeric_hex = set(numeric_hex)
	37	set_numeric_chars = set(numeric_chars)
	38	set_digit_chars = set(digit_chars)
	39	set_decimal_chars = set(decimal_chars)
	40	for i in py23_range(0X110000):
27	41	try:
28	42	a = py23_unichr(i)
29	43	except ValueError:

31	45	if a in set('0123456789'):
32	46	continue
33	47	if unicodedata.numeric(a, None) is not None:
34		assert a in numeric_chars
	48	assert i in set_numeric_hex
	49	assert a in set_numeric_chars
	50	if unicodedata.digit(a, None) is not None:
	51	assert i in set_numeric_hex
	52	assert a in set_digit_chars
	53	if unicodedata.decimal(a, None) is not None:
	54	assert i in set_numeric_hex
	55	assert a in set_decimal_chars
35	56
	57	assert set_decimal_chars.isdisjoint(digits_no_decimals)
	58	assert set_digit_chars.issuperset(digits_no_decimals)
36	59
37		def test_digit_chars_contains_all_valid_unicode_digit_characters():
38		for i in py23_range(0X10FFFF):
39		try:
40		a = py23_unichr(i)
41		except ValueError:
42		break
43		if a in set('0123456789'):
44		continue
45		if unicodedata.digit(a, None) is not None:
46		assert a in digit_chars
	60	assert set_decimal_chars.isdisjoint(numeric_no_decimals)
	61	assert set_numeric_chars.issuperset(numeric_no_decimals)
47	62
48	63
49	64	def test_combined_string_contains_all_characters_in_list():
50	65	assert numeric == ''.join(numeric_chars)
51	66	assert digits == ''.join(digit_chars)
	67	assert decimals == ''.join(decimal_chars)

+128

-608

test_natsort/test_utils.py less more

1	1	"""These test the utils.py functions."""
2	2	from __future__ import unicode_literals
3	3
4		import sys
5		import locale
6	4	import pathlib
7		import pytest
8	5	import string
9		from math import isnan
10		from operator import itemgetter
11	6	from itertools import chain
	7	from operator import neg as op_neg
12	8	from pytest import raises
13	9	from natsort.ns_enum import ns
14	10	from natsort.utils import (
15		_number_extracter,
16		_py3_safe,
17		_natsort_key,
	11	_sep_inserter,
18	12	_args_to_enum,
	13	_regex_chooser,
19	14	_float_sign_exp_re,
20	15	_float_nosign_exp_re,
21	16	_float_sign_noexp_re,

24	19	_int_sign_re,
25	20	_do_decoding,
26	21	_path_splitter,
27		_fix_nan,
28		)
29		from natsort.locale_help import locale_convert
30		from natsort.compat.py23 import py23_str
31		from natsort.compat.locale import (
32		use_pyicu,
33		null_string,
34		dumb_sort,
35		)
36		from natsort.compat.fastnumbers import (
37		fast_float,
38		fast_int,
39		isint,
40		)
	22	_groupletters,
	23	chain_functions,
	24	)
	25	from natsort.compat.py23 import py23_str, py23_cmp
	26	from natsort.compat.locale import null_string_locale
41	27	from slow_splitters import (
42		int_splitter,
43		float_splitter,
44	28	sep_inserter,
45		)
46		from compat.locale import (
47		load_locale,
48		get_strxfrm,
49		low,
50		)
51		from compat.hypothesis import (
52		assume,
	29	add_leading_space_if_first_is_num,
	30	)
	31	from compat.locale import low
	32	from hypothesis import (
53	33	given,
54		example,
	34	)
	35	from hypothesis.strategies import (
55	36	sampled_from,
56		use_hypothesis,
57		)
58
59		if sys.version[0] == '3':
60		long = int
61
62		ichain = chain.from_iterable
	37	lists,
	38	text,
	39	integers,
	40	)
63	41
64	42
65	43	def test_do_decoding_decodes_bytes_string_to_unicode():

129	107	assert _args_to_enum(**{'number_type': None,
130	108	'exp': True}) == ns.I \| ns.U
131	109
132		float_nosafe_locale_group = (fast_float, False, True, True)
133		float_nosafe_locale_nogroup = (fast_float, False, True, False)
134		float_safe_nolocale_nogroup = (fast_float, True, False, False)
135		float_nosafe_nolocale_group = (fast_float, False, False, True)
136		float_nosafe_nolocale_nogroup = (fast_float, False, False, False)
137		int_safe_locale_group = (fast_int, True, True, True)
138		int_safe_locale_nogroup = (fast_int, True, True, False)
139		int_safe_nolocale_group = (fast_int, True, False, True)
140		int_safe_nolocale_nogroup = (fast_int, True, False, False)
141		int_nosafe_locale_group = (fast_int, False, True, True)
142		int_nosafe_locale_nogroup = (fast_int, False, True, False)
143		int_nosafe_nolocale_group = (fast_int, False, False, True)
144		int_nosafe_nolocale_nogroup = (fast_int, False, False, False)
145
146
147		def test_fix_nan_converts_nan_to_negative_infinity_without_NANLAST():
148		assert _fix_nan((float('nan'),), 0) == (float('-inf'),)
149		assert _fix_nan(('a', 'b', float('nan')), 0) == ('a', 'b', float('-inf'))
150
151
152		def test_fix_nan_converts_nan_to_positive_infinity_with_NANLAST():
153		assert _fix_nan((float('nan'),), ns.NANLAST) == (float('+inf'),)
154		assert _fix_nan(('a', 'b', float('nan')), ns.NANLAST) == ('a', 'b', float('+inf'))
	110
	111	def test_regex_chooser_returns_correct_regular_expression_object():
	112	assert _regex_chooser[ns.INT] is _int_nosign_re
	113	assert _regex_chooser[ns.INT \| ns.NOEXP] is _int_nosign_re
	114	assert _regex_chooser[ns.INT \| ns.SIGNED] is _int_sign_re
	115	assert _regex_chooser[ns.INT \| ns.SIGNED \| ns.NOEXP] is _int_sign_re
	116	assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re
	117	assert _regex_chooser[ns.FLOAT \| ns.NOEXP] is _float_nosign_noexp_re
	118	assert _regex_chooser[ns.FLOAT \| ns.SIGNED] is _float_sign_exp_re
	119	assert _regex_chooser[ns.FLOAT \| ns.SIGNED \| ns.NOEXP] is _float_sign_noexp_re
	120
	121
	122	def test_ns_enum_values_have_are_as_expected():
	123	# Defaults
	124	assert ns.TYPESAFE == 0
	125	assert ns.INT == 0
	126	assert ns.VERSION == 0
	127	assert ns.DIGIT == 0
	128	assert ns.UNSIGNED == 0
	129
	130	# Aliases
	131	assert ns.TYPESAFE == ns.T
	132	assert ns.INT == ns.I
	133	assert ns.VERSION == ns.V
	134	assert ns.DIGIT == ns.D
	135	assert ns.UNSIGNED == ns.U
	136	assert ns.FLOAT == ns.F
	137	assert ns.SIGNED == ns.S
	138	assert ns.NOEXP == ns.N
	139	assert ns.PATH == ns.P
	140	assert ns.LOCALEALPHA == ns.LA
	141	assert ns.LOCALENUM == ns.LN
	142	assert ns.LOCALE == ns.L
	143	assert ns.IGNORECASE == ns.IC
	144	assert ns.LOWERCASEFIRST == ns.LF
	145	assert ns.GROUPLETTERS == ns.G
	146	assert ns.UNGROUPLETTERS == ns.UG
	147	assert ns.CAPITALFIRST == ns.C
	148	assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
	149	assert ns.NANLAST == ns.NL
	150	assert ns.COMPATIBILITYNORMALIZE == ns.CN
	151	assert ns.NUMAFTER == ns.NA
	152
	153	# Convenience
	154	assert ns.LOCALE == ns.LOCALEALPHA \| ns.LOCALENUM
	155	assert ns.REAL == ns.FLOAT \| ns.SIGNED
	156	assert ns._NUMERIC_ONLY == ns.REAL \| ns.NOEXP
	157
	158
	159	def test_chain_functions_is_a_no_op_if_no_functions_are_given():
	160	x = 2345
	161	assert chain_functions([])(x) is x
	162
	163
	164	def test_chain_functions_does_one_function_if_one_function_is_given():
	165	x = '2345'
	166	assert chain_functions([len])(x) == 4
	167
	168
	169	def test_chain_functions_combines_functions_in_given_order():
	170	x = 2345
	171	assert chain_functions([str, len, op_neg])(x) == -len(str(x))
155	172
156	173
157	174	# Each test has an "example" version for demonstrative purposes,
158	175	# and a test that uses the hypothesis module.
159	176
160
161		def test_py3_safe_does_nothing_if_no_numbers_example():
162		assert _py3_safe(['a', 'b', 'c'], False, isint) == ['a', 'b', 'c']
163		assert _py3_safe(['a'], False, isint) == ['a']
164
165
166		def test_py3_safe_does_nothing_if_only_one_number_example():
167		assert _py3_safe(['a', 5], False, isint) == ['a', 5]
168
169
170		def test_py3_safe_inserts_empty_string_between_two_numbers_example():
171		assert _py3_safe([5, 9], False, isint) == [5, '', 9]
172
173
174		def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers_example():
175		assert _py3_safe([5, 9], True, isint) == [5, null_string, 9]
176
177
178		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
179		@given([py23_str, int])
180		def test_py3_safe_inserts_empty_string_between_two_numbers(x):
181		assume(bool(x))
182		assert _py3_safe(x, False, isint) == sep_inserter(x, (int, long), '')
	177	def test_groupletters_returns_letters_with_lowercase_transform_of_letter_example():
	178	assert _groupletters('HELLO') == 'hHeElLlLoO'
	179	assert _groupletters('hello') == 'hheelllloo'
	180
	181
	182	@given(text().filter(bool))
	183	def test_groupeletters_returns_letters_with_lowercase_transform_of_letter(x):
	184	assert _groupletters(x) == ''.join(chain.from_iterable([low(y), y] for y in x))
	185
	186
	187	def test_sep_inserter_does_nothing_if_no_numbers_example():
	188	assert list(_sep_inserter(iter(['a', 'b', 'c']), '')) == ['a', 'b', 'c']
	189	assert list(_sep_inserter(iter(['a']), '')) == ['a']
	190
	191
	192	def test_sep_inserter_does_nothing_if_only_one_number_example():
	193	assert list(_sep_inserter(iter(['a', 5]), '')) == ['a', 5]
	194
	195
	196	def test_sep_inserter_inserts_separator_string_between_two_numbers_example():
	197	assert list(_sep_inserter(iter([5, 9]), '')) == ['', 5, '', 9]
	198	assert list(_sep_inserter(iter([5, 9]), null_string_locale)) == [null_string_locale, 5, null_string_locale, 9]
	199
	200
	201	@given(lists(elements=text().filter(bool) \| integers()))
	202	def test_sep_inserter_inserts_separator_between_two_numbers(x):
	203	assert list(_sep_inserter(iter(x), '')) == list(add_leading_space_if_first_is_num(sep_inserter(x, ''), ''))
183	204
184	205
185	206	def test_path_splitter_splits_path_string_by_separator_example():
186	207	z = '/this/is/a/path'
187		assert _path_splitter(z) == list(pathlib.Path(z).parts)
188
189
190		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
191		@given([sampled_from(string.ascii_letters)])
	208	assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
	209	z = pathlib.Path('/this/is/a/path')
	210	assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
	211
	212
	213	@given(lists(sampled_from(string.ascii_letters), min_size=2).filter(all))
192	214	def test_path_splitter_splits_path_string_by_separator(x):
193		assume(len(x) > 1)
194		assume(all(x))
195	215	z = py23_str(pathlib.Path(*x))
196		assert _path_splitter(z) == list(pathlib.Path(z).parts)
	216	assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts)
197	217
198	218
199	219	def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example():
200	220	z = '/this/is/a/path/file.exe'
201		y = list(pathlib.Path(z).parts)
202		assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix]
203
204
205		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
206		@given([sampled_from(string.ascii_letters)])
	221	y = tuple(pathlib.Path(z).parts)
	222	assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
	223
	224
	225	@given(lists(sampled_from(string.ascii_letters), min_size=3).filter(all))
207	226	def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x):
208		assume(len(x) > 2)
209		assume(all(x))
210	227	z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1]
211		y = list(pathlib.Path(z).parts)
212		assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix]
213
214
215		def test_number_extracter_raises_TypeError_if_given_a_number_example():
216		with raises(TypeError):
217		assert _number_extracter(50.0, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)
218
219
220		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
221		@given(float)
222		def test_number_extracter_raises_TypeError_if_given_a_number(x):
223		with raises(TypeError):
224		assert _number_extracter(x, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)
225
226
227		def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example():
228		assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 0.5034]
229
230
231		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
232		@given([float, py23_str, int])
233		def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x):
234		assume(len(x) <= 10)
235		assume(not any(type(y) == float and isnan(y) for y in x))
236		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
237		assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, True, False, '')
238
239
240		def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example():
241		assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034]
242
243
244		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
245		@given([float, py23_str, int])
246		def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x):
247		assume(len(x) <= 10)
248		assume(not any(type(y) == float and isnan(y) for y in x))
249		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
250		assert _number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, True, False, '')
251
252
253		def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example():
254		assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 5.034, 'e', -1.0]
255
256
257		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
258		@given([float, py23_str, int])
259		def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x):
260		assume(len(x) <= 10)
261		assume(not any(type(y) == float and isnan(y) for y in x))
262		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
263		assert _number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, False, False, '')
264
265
266		def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example():
267		assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0]
268
269
270		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
271		@given([float, py23_str, int])
272		def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x):
273		assume(len(x) <= 10)
274		assume(not any(type(y) == float and isnan(y) for y in x))
275		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
276		assert _number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, False, False, '')
277
278
279		def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example():
280		assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1]
281
282
283		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
284		@given([float, py23_str, int])
285		@example([10000000000000000000000000000000000000000000000000000000000000000000000000,
286		100000000000000000000000000000000000000000000000000000000000000000000000000,
287		100000000000000000000000000000000000000000000000000000000000000000000000000])
288		def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x):
289		assume(len(x) <= 10)
290		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
291		assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, False, False, '')
292
293
294		def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example():
295		assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, 5, '.', 34, 'e', -1]
296
297
298		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
299		@given([float, py23_str, int])
300		def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x):
301		assume(len(x) <= 10)
302		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
303		assert _number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, True, False, '')
304
305
306		def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option_example():
307		assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '', 0.5034]
308
309
310		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
311		@given([float, py23_str, int])
312		def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(x):
313		assume(len(x) <= 10)
314		assume(not any(type(y) == float and isnan(y) for y in x))
315		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
316		assert _number_extracter(s, _float_sign_exp_re, *float_safe_nolocale_nogroup) == float_splitter(s, True, True, True, '')
317
318
319		def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option_example():
320		assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_safe_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1]
321
322
323		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
324		@given([float, py23_str, int])
325		def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(x):
326		assume(len(x) <= 10)
327		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
328		assert _number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup) == int_splitter(s, True, True, '')
329
330
331		def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent_example():
332		assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034]
333
334
335		def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example():
336		assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, 0.5034]
337
338
339		def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe_exmple():
340		assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034]
341
342
343		def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example():
344		assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nosafe_nolocale_group) == ['aA', 5.0, 0.5034]
345
346
347		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
348		@given([float, py23_str, int])
349		def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x):
350		assume(len(x) <= 10)
351		assume(not any(type(y) == float and isnan(y) for y in x))
352		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
353		t = float_splitter(s, True, True, False, '')
354		t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t]
355		assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_group) == t
356
357
358		def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example():
359		assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1]
360
361
362		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
363		@given([float, py23_str, int])
364		def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x):
365		assume(len(x) <= 10)
366		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
367		t = int_splitter(s, False, False, '')
368		t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t]
369		assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_group) == t
370
371
372		def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example():
373		load_locale('en_US')
374		strxfrm = get_strxfrm()
375		assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1]
376		locale.setlocale(locale.LC_NUMERIC, str(''))
377
378
379		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
380		@given([float, py23_str, int])
381		def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x):
382		assume(len(x) <= 10)
383		load_locale('en_US')
384		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
385		t = int_splitter(s, False, False, null_string)
386		try: # Account for locale bug on Python 3.2
387		t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), False) for i, y in enumerate(t)]
388		assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_nogroup) == t
389		except OverflowError:
390		pass
391		locale.setlocale(locale.LC_NUMERIC, str(''))
392
393
394		def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters_example():
395		load_locale('en_US')
396		strxfrm = get_strxfrm()
397		assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_group) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1]
398		locale.setlocale(locale.LC_NUMERIC, str(''))
399
400
401		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
402		@given([float, py23_str, int])
403		def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(x):
404		assume(len(x) <= 10)
405		load_locale('en_US')
406		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
407		t = int_splitter(s, False, False, null_string)
408		try: # Account for locale bug on Python 3.2
409		t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), True) for i, y in enumerate(t)]
410		assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_group) == t
411		except OverflowError:
412		pass
413		locale.setlocale(locale.LC_NUMERIC, str(''))
414
415
416		def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf():
417		assert _natsort_key('nan', None, ns.FLOAT) == ('', float('-inf'))
418		assert _natsort_key(float('nan'), None, 0) == ('', float('-inf'))
419
420
421		def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf():
422		assert _natsort_key('nan', None, ns.FLOAT \| ns.NANLAST) == ('', float('+inf'))
423		assert _natsort_key(float('nan'), None, ns.NANLAST) == ('', float('+inf'))
424		assert ns.NL == ns.NANLAST
425
426
427		# The remaining tests provide no examples, just hypothesis tests.
428		# They only confirm that _natsort_key uses the above building blocks.
429
430
431		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
432		@given([float, py23_str, int])
433		def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x):
434		assume(len(x) <= 10)
435		assume(not any(type(y) == float and isnan(y) for y in x))
436		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
437		assert ns.F == ns.FLOAT
438		assert ns.S == ns.SIGNED
439		assert _natsort_key(s, None, ns.F \| ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup))
440
441
442		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
443		@given([float, py23_str, int])
444		def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x):
445		assume(len(x) <= 10)
446		assume(not any(type(y) == float and isnan(y) for y in x))
447		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
448		assert ns.R == ns.F \| ns.S
449		assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup))
450
451
452		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
453		@given([float, py23_str, int])
454		def test__natsort_key_with_real_matches_signed_float(x):
455		assume(len(x) <= 10)
456		assume(not any(type(y) == float and isnan(y) for y in x))
457		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
458		assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F \| ns.S)
459
460
461		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
462		@given([float, py23_str, int])
463		def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x):
464		assume(len(x) <= 10)
465		assume(not any(type(y) == float and isnan(y) for y in x))
466		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
467		assert ns.N == ns.NOEXP
468		assert _natsort_key(s, None, ns.F \| ns.S \| ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup))
469
470
471		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
472		@given([float, py23_str, int])
473		def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x):
474		assume(len(x) <= 10)
475		assume(not any(type(y) == float and isnan(y) for y in x))
476		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
477		assert ns.U == ns.UNSIGNED
478		assert _natsort_key(s, None, ns.F \| ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup))
479		# Default is unsigned search
480		assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup))
481
482
483		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
484		@given([float, py23_str, int])
485		def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x):
486		assume(len(x) <= 10)
487		assume(not any(type(y) == float and isnan(y) for y in x))
488		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
489		assert _natsort_key(s, None, ns.F \| ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup))
490
491
492		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
493		@given([float, py23_str, int])
494		def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x):
495		assume(len(x) <= 10)
496		assume(not any(type(y) == float and isnan(y) for y in x))
497		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
498		assert ns.I == ns.INT
499		assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
500		# Default is int search
501		assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
502		# NOEXP is ignored for integers
503		assert _natsort_key(s, None, ns.I \| ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
504
505
506		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
507		@given([float, py23_str, int])
508		def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x):
509		assume(len(x) <= 10)
510		assume(not any(type(y) == float and isnan(y) for y in x))
511		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
512		assert _natsort_key(s, None, ns.INT \| ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup))
513		assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup))
514
515
516		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
517		@given([float, py23_str, int])
518		def test__natsort_key_with_version_or_digit_matches_usigned_int(x):
519		assume(len(x) <= 10)
520		assume(not any(type(y) == float and isnan(y) for y in x))
521		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
522		assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT \| ns.UNSIGNED)
523		assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION)
524
525
526		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
527		@given([float, py23_str, int])
528		def test__natsort_key_with_key_applies_key_function_before_splitting(x):
529		assume(len(x) <= 10)
530		assume(not any(type(y) == float and isnan(y) for y in x))
531		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
532		assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
533
534
535		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
536		@given([float, py23_str, int])
537		def test__natsort_key_with_tuple_input_returns_nested_tuples(x):
538		# Iterables are parsed recursively so you can sort lists of lists.
539		assume(len(x) <= 10)
540		assume(not any(type(y) == float and isnan(y) for y in x))
541		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
542		t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
543		assert _natsort_key((s, s), None, ns.I) == (t, t)
544
545
546		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
547		@given([float, py23_str, int])
548		def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x):
549		# A key is applied before recursion, but not in the recursive calls.
550		assume(len(x) <= 10)
551		assume(not any(type(y) == float and isnan(y) for y in x))
552		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
553		t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup))
554		assert _natsort_key((s, s), itemgetter(1), ns.I) == t
555
556
557		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
558		@given(float)
559		def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x):
560		assume(not isnan(x))
561		if x.is_integer():
562		x = int(x)
563		assert _natsort_key(x, None, ns.I) == ('', x)
564
565
566		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
567		@given([float, py23_str, int])
568		def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(x):
569		# Turn on TYPESAFE to put a '' between adjacent numbers
570		assume(len(x) <= 10)
571		assume(not any(type(y) == float and isnan(y) for y in x))
572		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
573		assert _natsort_key(s, None, ns.TYPESAFE \| ns.S) == tuple(_number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup))
574
575
576		def test__natsort_key_with_invalid_alg_input_raises_ValueError():
577		# Invalid arguments give the correct response
578		with raises(ValueError) as err:
579		_natsort_key('a', None, '1')
580		assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1"
581
582
583		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
584		@given([float, py23_str, int])
585		def test__natsort_key_with_IGNORECASE_lowercases_text(x):
586		assume(len(x) <= 10)
587		assume(not any(type(y) == float and isnan(y) for y in x))
588		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
589		try:
590		assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
591		except AttributeError:
592		assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
593
594
595		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
596		@given([float, py23_str, int])
597		def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x):
598		assume(len(x) <= 10)
599		assume(not any(type(y) == float and isnan(y) for y in x))
600		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
601		assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup))
602
603
604		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
605		@given([float, py23_str, int])
606		def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x):
607		assume(len(x) <= 10)
608		assume(not any(type(y) == float and isnan(y) for y in x))
609		s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
610		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
611		t = _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)
612		assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
613
614
615		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
616		@given([float, py23_str, int])
617		def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x):
618		assume(len(x) <= 10)
619		assume(not any(type(y) == float and isnan(y) for y in x))
620		s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x))
621		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
622		t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup)
623		assert _natsort_key(s, None, ns.G \| ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t)
624
625
626		def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple():
627		if sys.version[0] == '3':
628		assert _natsort_key(b'Apple56', None, ns.I) == (b'Apple56',)
629		assert _natsort_key(b'Apple56', None, ns.LF) == (b'aPPLE56',)
630		assert _natsort_key(b'Apple56', None, ns.IC) == (b'apple56',)
631		assert _natsort_key(b'Apple56', None, ns.G) == (b'Apple56',)
632		else:
633		assert True
634
635
636		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
637		@given([float, py23_str, int])
638		def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x):
639		# Locale aware sorting
640		assume(len(x) <= 10)
641		assume(not any(type(y) == float and isnan(y) for y in x))
642		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
643		load_locale('en_US')
644		if dumb_sort():
645		assert _natsort_key(s, None, ns.LOCALE \| ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group))
646		else:
647		assert _natsort_key(s, None, ns.LOCALE \| ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup))
648		locale.setlocale(locale.LC_NUMERIC, str(''))
649
650
651		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
652		@given([float, py23_str, int])
653		def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x):
654		# Locale aware sorting
655		assume(len(x) <= 10)
656		assume(not any(type(y) == float and isnan(y) for y in x))
657		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
658		load_locale('en_US')
659		if dumb_sort():
660		t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group))
661		else:
662		t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup))
663		if not t:
664		r = (t, t)
665		elif t[0] is null_string:
666		r = ((b'' if use_pyicu else '',), t)
667		else:
668		r = ((s[0],), t)
669		assert _natsort_key(s, None, ns.LOCALE \| ns.UNGROUPLETTERS \| ns.F) == r
670		# The below are all aliases for UNGROUPLETTERS
671		assert ns.UNGROUPLETTERS == ns.UG
672		assert ns.UNGROUPLETTERS == ns.CAPITALFIRST
673		assert ns.UNGROUPLETTERS == ns.C
674		locale.setlocale(locale.LC_NUMERIC, str(''))
675
676
677		@pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater')
678		@given([float, py23_str, int])
679		def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x):
680		assume(len(x) <= 10)
681		assume(not any(type(y) == float and isnan(y) for y in x))
682		s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x)
683		assert _natsort_key(s, None, ns.UG \| ns.I) == _natsort_key(s, None, ns.I)
684
685
686		# It is difficult to generate code that will create random filesystem paths,
687		# so "example" based tests are given for the PATH option.
688
689
690		def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions():
691		# Turn on PATH to split a file path into components
692		assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH \| ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
693
694
695		def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions():
696		assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH \| ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
697
698
699		def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions():
700		assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH \| ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',))
701
702
703		def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples():
704		# Converts pathlib PurePath (and subclass) objects to string before sorting
705		assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH \| ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',))
706
707
708		def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple():
709		# It gracefully handles as_path for numeric input by putting an extra tuple around it
710		# so it will sort against the other as_path results.
711		assert _natsort_key(10, None, ns.PATH) == (('', 10),)
712
713
714		def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple():
715		# PATH also handles recursion well.
716		assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')')))
	228	y = tuple(pathlib.Path(z).parts)
	229	assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix)
	230
	231
	232	@given(integers())
	233	def test_py23_cmp(x):
	234	assert py23_cmp(x, x) == 0
	235	assert py23_cmp(x, x + 1) < 0
	236	assert py23_cmp(x, x - 1) > 0

+40

-3

tox.ini less more

4	4
5	5	[tox]
6	6	envlist =
7		py26, py27, py32, py33, py34, pypy
	7	py27, py34, py35, py36, py37, pypy
	8	# Other valid evironments are:
	9	# docs
	10	# release
	11
	12	# Don't error out if a user hasn't installed all python versions.
	13	skip_missing_interpreters =
	14	true
8	15
9	16	[testenv]
10		commands = {envpython} setup.py test
11		deps = pytest
	17	passenv =
	18	WITH_EXTRAS
	19	deps =
	20	pipenv
	21	extras =
	22	{env:WITH_EXTRAS:}
	23	commands =
	24	pipenv install --dev --skip-lock
	25	# Only run How It Works doctest on Python 3.6.
	26	py36: {envpython} -m doctest -o IGNORE_EXCEPTION_DETAIL docs/source/howitworks.rst
	27	# Other doctests are run for all pythons.
	28	pytest README.rst docs/source/intro.rst docs/source/examples.rst
	29	pytest --doctest-modules {envsitepackagesdir}/natsort
	30	# Full test suite. Allow the user to pass command-line objects.
	31	pytest --flakes --pep8 --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:}
	32
	33	# Build documentation.
	34	[testenv:docs]
	35	deps =
	36	sphinx
	37	sphinx_rtd_theme
	38	commands =
	39	{envpython} setup.py build_sphinx
	40
	41	[testenv:release]
	42	deps =
	43	twine
	44	check-manifest
	45	commands =
	46	check-manifest
	47	{envpython} setup.py sdist bdist_wheel
	48	twine upload dist/*