Imported Upstream version 4.0.3
Agustin Henze
8 years ago
1 | 1 | |
2 | 2 | # Packages |
3 | 3 | *.egg |
4 | *.eggs | |
4 | 5 | *.egg-info |
5 | 6 | dist |
6 | 7 | build |
19 | 20 | pip-log.txt |
20 | 21 | |
21 | 22 | # Unit test / coverage reports |
23 | .hypothesis | |
22 | 24 | .coverage |
23 | 25 | .tox |
24 | 26 | .cache |
27 | .pytest | |
25 | 28 | |
26 | 29 | #Translations |
27 | 30 | *.mo |
3 | 3 | |
4 | 4 | # Packages |
5 | 5 | *.egg |
6 | *.eggs | |
6 | 7 | *.egg-info |
7 | 8 | dist |
8 | 9 | build |
21 | 22 | pip-log.txt |
22 | 23 | |
23 | 24 | # Unit test / coverage reports |
25 | .hypothesis | |
24 | 26 | .coverage |
25 | 27 | .tox |
26 | 28 | .cache |
29 | .pytest | |
27 | 30 | |
28 | 31 | #Translations |
29 | 32 | *.mo |
12 | 12 | - sudo locale-gen de_DE.UTF-8 |
13 | 13 | - sudo apt-get install bc |
14 | 14 | install: |
15 | - pip install -U pip | |
15 | 16 | - if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi |
16 | 17 | - if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi |
17 | 18 | - if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi |
18 | - if [[ $WITH_OPTIONS == true && 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi | |
19 | - if [[ 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi | |
19 | 20 | - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi |
20 | 21 | - if [[ $(echo "$TRAVIS_PYTHON_VERSION < 3.3" | bc -l) ]]; then pip install mock; fi |
21 | - pip install pytest-cov pytest-flakes pytest-pep8 | |
22 | - pip install pytest-cov pytest-flakes pytest-pep8 hypothesis | |
22 | 23 | - pip install coveralls |
23 | 24 | script: |
24 | 25 | - python -m pytest --cov natsort --flakes --pep8 |
3 | 3 | include natsort/_version.py |
4 | 4 | include natsort/__main__.py |
5 | 5 | include natsort/__init__.py |
6 | include natsort/py23compat.py | |
7 | 6 | include natsort/locale_help.py |
8 | include natsort/fake_fastnumbers.py | |
9 | 7 | include natsort/utils.py |
8 | include natsort/ns_enum.py | |
9 | include natsort/unicode_numbers.py | |
10 | include natsort/compat/__init__.py | |
11 | include natsort/compat/py23.py | |
12 | include natsort/compat/fake_fastnumbers.py | |
13 | include natsort/compat/fastnumbers.py | |
14 | include natsort/compat/locale.py | |
15 | include natsort/compat/pathlib.py | |
16 | include natsort/compat/pathlib.py | |
10 | 17 | include test_natsort/profile_natsorted.py |
11 | 18 | include test_natsort/stress_natsort.py |
19 | include test_natsort/slow_splitters.py | |
12 | 20 | include test_natsort/test_natsort.py |
13 | 21 | include test_natsort/test_locale_help.py |
14 | 22 | include test_natsort/test_fake_fastnumbers.py |
15 | 23 | include test_natsort/test_main.py |
16 | 24 | include test_natsort/test_utils.py |
25 | include test_natsort/test_unicode_numbers.py | |
26 | include test_natsort/compat/__init__.py | |
27 | include test_natsort/compat/hypothesis.py | |
28 | include test_natsort/compat/locale.py | |
29 | include test_natsort/compat/mock.py | |
17 | 30 | include setup.py |
18 | 31 | include setup.cfg |
19 | 32 | prune natsort/__pycache__ |
10 | 10 | |
11 | 11 | - Source Code: https://github.com/SethMMorton/natsort |
12 | 12 | - Downloads: https://pypi.python.org/pypi/natsort |
13 | - Documentation: http://pythonhosted.org/natsort/ | |
14 | ||
15 | Please see `Deprecation Notices`_ for an `important` backwards incompatibility notice | |
16 | for ``natsort`` version 4.0.0. | |
13 | - Documentation: http://pythonhosted.org/natsort | |
14 | ||
15 | Please see `Moving from older Natsort versions`_ to see if this update requires | |
16 | you to modify your ``natsort`` calls in your code (99% of users will not). | |
17 | 17 | |
18 | 18 | Quick Description |
19 | 19 | ----------------- |
46 | 46 | ``natsorted`` identifies real numbers anywhere in a string and sorts them |
47 | 47 | naturally. |
48 | 48 | |
49 | Sorting version numbers is just as easy with the ``versorted`` function: | |
50 | ||
51 | .. code-block:: python | |
52 | ||
53 | >>> from natsort import versorted | |
49 | Sorting versions is handled properly by default (as of ``natsort`` version >= 4.0.0): | |
50 | ||
51 | .. code-block:: python | |
52 | ||
54 | 53 | >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] |
55 | >>> versorted(a) | |
54 | >>> natsorted(a) | |
56 | 55 | ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] |
57 | >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work | |
58 | ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] | |
56 | ||
57 | If you need to sort release candidates, please see | |
58 | `this useful hack <http://pythonhosted.org//natsort/examples.htm#rc-sorting>`_ . | |
59 | 59 | |
60 | 60 | You can also perform locale-aware sorting (or "human sorting"), where the |
61 | 61 | non-numeric characters are ordered based on their meaning, not on their |
79 | 79 | and the `Optional Dependencies`_ section |
80 | 80 | below before using the ``humansorted`` function, *especially* if you are on a |
81 | 81 | BSD-based system (like Mac OS X). |
82 | ||
83 | You can sort signed floats (i.e. real numbers) using the ``realsorted``; this is | |
84 | useful in scientific data analysis. This was the default behavior of ``natsorted`` | |
85 | for ``natsort`` version < 4.0.0: | |
86 | ||
87 | .. code-block:: python | |
88 | ||
89 | >>> from natsort import realsorted | |
90 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
91 | >>> natsorted(a) | |
92 | ['num2', 'num5.3', 'num5.10', 'num-3'] | |
93 | >>> realsorted(a) | |
94 | ['num-3', 'num2', 'num5.10', 'num5.3'] | |
82 | 95 | |
83 | 96 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
84 | 97 | when you sort: |
128 | 141 | Requirements |
129 | 142 | ------------ |
130 | 143 | |
131 | ``natsort`` requires python version 2.6 or greater | |
132 | (this includes python 3.x). To run version 2.6, 3.0, or 3.1 the | |
133 | `argparse <https://pypi.python.org/pypi/argparse>`_ module is required. | |
144 | ``natsort`` requires Python version 2.7 or greater or Python 3.2 or greater. | |
134 | 145 | |
135 | 146 | .. _optional: |
136 | 147 | |
152 | 163 | ''''' |
153 | 164 | |
154 | 165 | On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library |
155 | can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use | |
156 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed | |
157 | on your computer; this will give more reliable cross-platform results. | |
158 | ``natsort`` will not require (or check) that | |
159 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation | |
160 | since in Linux-based systems and Windows systems ``locale`` should work just fine. | |
161 | Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and | |
162 | how to install on Mac OS X. | |
166 | can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is | |
167 | used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, | |
168 | one can | |
169 | ||
170 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
171 | locale. These locales do not suffer from as many problems as "UTF-8" | |
172 | and thus should give expected results. | |
173 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
174 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
175 | will use it under the hood; this will give more | |
176 | reliable cross-platform results in the long run. ``natsort`` will not | |
177 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
178 | is installed at installation. Please visit | |
179 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
180 | how to install on Mac OS X. **Please note** that using | |
181 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
182 | guarantee correct results for all input on BSD-based systems, since | |
183 | every other suggestion is a workaround. | |
184 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
185 | to compensate for a broken ``locale`` library in terms of case-handling; | |
186 | if you do not need to be able to properly handle non-ASCII characters | |
187 | then this may be the best option for you. | |
188 | ||
189 | Note that the above solutions *should not* be required for Windows or | |
190 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
191 | just fine. | |
163 | 192 | |
164 | 193 | .. _deprecate: |
165 | 194 | |
166 | Deprecation Notices | |
167 | ------------------- | |
168 | ||
169 | - The default sorting algorithm for ``natsort`` will change in version 4.0.0 | |
170 | from signed floats (with exponents) to unsigned integers. The motivation | |
171 | for this change is that it will cause ``natsort`` to return results that | |
172 | pass the "least astonishment" test for the most common use case, which is | |
173 | sorting version numbers. If you currently rely on the default behavior | |
174 | to be signed floats, it is recommend that you add ``alg=ns.F`` to your | |
175 | ``natsort`` calls or switch to the new ``realsorted`` function which | |
176 | behaves identically to the current ``natsorted`` with default values. | |
177 | - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, | |
178 | ``as_path``, and ``py3_safe`` options will be removed from the (documented) | |
179 | API, in favor of the ``alg`` option and ``ns`` enum. They will remain as | |
180 | keyword-only arguments after that (for the foreseeable future). | |
181 | - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed | |
182 | from the public API. All future development should use ``natsort_keygen`` | |
183 | in preparation for this. | |
195 | Moving from older Natsort versions | |
196 | ---------------------------------- | |
197 | ||
198 | - The default sorting algorithm for ``natsort`` has changed in version 4.0.0 | |
199 | from signed floats (with exponents) to unsigned integers. The motivation | |
200 | for this change is that it will cause ``natsort`` to return results that | |
201 | pass the "least astonishment" test for the most common use case, which is | |
202 | sorting version numbers. If you relied on the default behavior | |
203 | to be signed floats, add ``alg=ns.F | ns.S`` to your | |
204 | ``natsort`` calls or switch to the new ``realsorted`` function which | |
205 | behaves identically to the older ``natsorted`` with default values. | |
206 | For 99% of users this change will not effect their code... it is only | |
207 | expected that this will effect users using ``natsort`` for science and | |
208 | engineering. | |
209 | This will also affect the default behavior of the ``natsort`` shell script. | |
210 | - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, | |
211 | ``as_path``, and ``py3_safe`` options have be removed from the (documented) | |
212 | API in favor of the ``alg`` option and ``ns`` enum. | |
213 | - In ``natsort`` version 4.0.0, the ``natsort_key`` function has been removed | |
214 | from the public API. | |
184 | 215 | |
185 | 216 | Author |
186 | 217 | ------ |
193 | 224 | These are the last three entries of the changelog. See the package documentation |
194 | 225 | for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. |
195 | 226 | |
196 | 04-06-2015 v. 3.5.6 | |
227 | 06-25-2015 v. 4.0.3 | |
197 | 228 | ''''''''''''''''''' |
198 | 229 | |
199 | - Added 'UNGROUPLETTERS' algorithm to get the case-grouping behavior of | |
200 | an ordinal sort when using 'LOCALE'. | |
201 | - Added convenience functions 'decoder', 'as_ascii', and 'as_utf8' for | |
202 | dealing with bytes types. | |
203 | ||
204 | 04-04-2015 v. 3.5.5 | |
230 | - Fixed bad install on last release (sorry guys!). | |
231 | ||
232 | 06-24-2015 v. 4.0.2 | |
205 | 233 | ''''''''''''''''''' |
206 | 234 | |
207 | - Added 'realsorted' and 'index_realsorted' functions for | |
208 | forward-compatibility with >= 4.0.0. | |
209 | - Made explanation of when to use "TYPESAFE" more clear in the docs. | |
210 | ||
211 | 04-02-2015 v. 3.5.4 | |
235 | - Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now | |
236 | performed for these versions. | |
237 | - Consolidated under-the-hood compatibility functionality. | |
238 | ||
239 | 06-04-2015 v. 4.0.1 | |
212 | 240 | ''''''''''''''''''' |
213 | 241 | |
214 | - Fixed bug where a 'TypeError' was raised if a string containing a leading | |
215 | number was sorted with alpha-only strings when 'LOCALE' is used. | |
242 | - Added support for sorting NaN by internally converting to -Infinity | |
243 | or +Infinity |
9 | 9 | :maxdepth: 2 |
10 | 10 | |
11 | 11 | natsort_keygen.rst |
12 | natsort_key.rst | |
13 | 12 | natsorted.rst |
14 | 13 | versorted.rst |
15 | 14 | humansorted.rst |
1 | 1 | |
2 | 2 | Changelog |
3 | 3 | --------- |
4 | ||
5 | 06-25-2015 v. 4.0.3 | |
6 | ''''''''''''''''''' | |
7 | ||
8 | - Fixed bad install on last release (sorry guys!). | |
9 | ||
10 | 06-24-2015 v. 4.0.2 | |
11 | ''''''''''''''''''' | |
12 | ||
13 | - Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now | |
14 | performed for these versions. | |
15 | - Consolidated under-the-hood compatibility functionality. | |
16 | ||
17 | 06-04-2015 v. 4.0.1 | |
18 | ''''''''''''''''''' | |
19 | ||
20 | - Added support for sorting NaN by internally converting to -Infinity | |
21 | or +Infinity | |
22 | ||
23 | 05-17-2015 v. 4.0.0 | |
24 | ''''''''''''''''''' | |
25 | ||
26 | - Made default behavior of 'natsort' search for unsigned ints, | |
27 | rather than signed floats. This is a backwards-incompatible | |
28 | change but in 99% of use cases it should not require any | |
29 | end-user changes. | |
30 | - Improved handling of locale-aware sorting on systems where the | |
31 | underlying locale library is broken. | |
32 | - Greatly improved all unit tests by adding the hypothesis library. | |
4 | 33 | |
5 | 34 | 04-06-2015 v. 3.5.6 |
6 | 35 | ''''''''''''''''''' |
14 | 14 | In the most basic use case, simply import :func:`~natsorted` and use |
15 | 15 | it as you would :func:`sorted`:: |
16 | 16 | |
17 | >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] | |
17 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
18 | 18 | >>> sorted(a) |
19 | ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] | |
19 | ['a1', 'a10', 'a2', 'a4', 'a9'] | |
20 | 20 | >>> from natsort import natsorted, ns |
21 | 21 | >>> natsorted(a) |
22 | ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] | |
22 | ['a1', 'a2', 'a4', 'a9', 'a10'] | |
23 | 23 | |
24 | 24 | Sort Version Numbers |
25 | 25 | -------------------- |
26 | 26 | |
27 | With default options, :func:`~natsorted` will not sort version numbers | |
28 | well. Version numbers are best sorted by searching for valid unsigned int | |
29 | literals, not floats. This can be achieved in three ways, as shown below:: | |
30 | ||
31 | >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] | |
32 | >>> natsorted(a) # This gives incorrect results | |
33 | ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1'] | |
34 | >>> natsorted(a, alg=ns.INT | ns.UNSIGNED) | |
35 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
36 | >>> natsorted(a, alg=ns.VERSION) | |
37 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
38 | >>> from natsort import versorted | |
39 | >>> versorted(a) | |
40 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
41 | ||
42 | You can see that ``alg=ns.VERSION`` is a shortcut for | |
43 | ``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for | |
44 | ``natsorted(alg=ns.VERSION)``. The recommend manner to sort version | |
45 | numbers is to use :func:`~versorted`. | |
27 | As of :mod:`natsort` version >= 4.0.0, :func:`~natsorted` will now properly | |
28 | sort version numbers. The old function :func:`~versorted` exists for | |
29 | backwards compatibility but new development should use :func:`~natsorted`. | |
30 | ||
31 | .. _rc_sorting: | |
46 | 32 | |
47 | 33 | Sorting with Alpha, Beta, and Release Candidates |
48 | 34 | ++++++++++++++++++++++++++++++++++++++++++++++++ |
51 | 37 | scheme, you may not get the results you expect:: |
52 | 38 | |
53 | 39 | >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] |
54 | >>> versorted(a) | |
40 | >>> natsorted(a) | |
55 | 41 | ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] |
56 | 42 | |
57 | 43 | To make the '1.2' pre-releases come before '1.2.1', you need to use the following |
58 | 44 | recipe:: |
59 | 45 | |
60 | >>> versorted(a, key=lambda x: x.replace('.', '~')) | |
46 | >>> natsorted(a, key=lambda x: x.replace('.', '~')) | |
61 | 47 | ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] |
62 | 48 | |
63 | 49 | If you also want '1.2' after all the alpha, beta, and rc candidates, you can |
64 | 50 | modify the above recipe:: |
65 | 51 | |
66 | >>> versorted(a, key=lambda x: x.replace('.', '~')+'z') | |
52 | >>> natsorted(a, key=lambda x: x.replace('.', '~')+'z') | |
67 | 53 | ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] |
68 | 54 | |
69 | 55 | Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to |
122 | 108 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_; you can read about |
123 | 109 | them here: http://bugs.python.org/issue23195. |
124 | 110 | |
111 | If you have problems with ``ns.LOCALE`` (or :func:`~humansorted`), | |
112 | especially on BSD-based systems, you can try the following: | |
113 | ||
114 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
115 | locale. These locales do not suffer from as many problems as "UTF-8" | |
116 | and thus should give expected results. | |
117 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
118 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
119 | will use it under the hood; this will give more | |
120 | reliable cross-platform results in the long run. ``natsort`` will not | |
121 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
122 | is installed at installation. Please visit | |
123 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
124 | how to install on Mac OS X. **Please note** that using | |
125 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
126 | guarantee correct results for all input on BSD-based systems, since | |
127 | every other suggestion is a workaround. | |
128 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
129 | to compensate for a broken ``locale`` library in terms of case-handling; | |
130 | if you do not need to be able to properly handle non-ASCII characters | |
131 | then this may be the best option for you. | |
132 | ||
133 | Note that the above solutions *should not* be required for Windows or | |
134 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
135 | just fine. | |
136 | ||
125 | 137 | Controlling Case When Sorting |
126 | 138 | ----------------------------- |
127 | 139 | |
166 | 178 | Customizing Float Definition |
167 | 179 | ---------------------------- |
168 | 180 | |
169 | By default :func:`~natsorted` searches for any float that would be | |
181 | You can make :func:`~natsorted` search for any float that would be | |
170 | 182 | a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. |
171 | Perhaps you don't want to search for signed numbers, or you don't | |
172 | want to search for exponential notation, the ``ns.UNSIGNED`` and | |
173 | ``ns.NOEXP`` options allow you to do this:: | |
183 | using the ``ns.FLOAT`` key. You can disable the exponential component | |
184 | of the number with ``ns.NOEXP``. :: | |
174 | 185 | |
175 | 186 | >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] |
176 | >>> natsorted(a) | |
187 | >>> natsorted(a, alg=ns.FLOAT) | |
188 | ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] | |
189 | >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED) | |
177 | 190 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] |
178 | >>> natsorted(a, alg=ns.UNSIGNED) | |
179 | ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] | |
180 | >>> natsorted(a, alg=ns.NOEXP) | |
191 | >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED | ns.NOEXP) | |
181 | 192 | ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] |
193 | ||
194 | For convenience, the ``ns.REAL`` option is provided which is a shortcut | |
195 | for ``ns.FLOAT | ns.SIGNED`` and can be used to sort on real numbers. | |
196 | This can be easily accessed with the :func:`~realsorted` convenience | |
197 | function. Please note that the behavior of the :func:`~realsorted` function | |
198 | was the default behavior of :func:`~natsorted` for :mod:`natsort` | |
199 | version < 4.0.0:: | |
200 | ||
201 | >>> natsorted(a, alg=ns.REAL) | |
202 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
203 | >>> from natsort import realsorted | |
204 | >>> realsorted(a) | |
205 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
182 | 206 | |
183 | 207 | Using a Custom Sorting Key |
184 | 208 | -------------------------- |
208 | 232 | |
209 | 233 | >>> from natsort import natsort_keygen |
210 | 234 | >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] |
211 | >>> natsort_key = natsort_keygen() | |
235 | >>> natsort_key = natsort_keygen(alg=ns.FLOAT) | |
212 | 236 | >>> a.sort(key=natsort_key) |
213 | 237 | >>> a |
214 | 238 | ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] |
215 | >>> versort_key = natsort_keygen(alg=ns.VERSION) | |
216 | >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] | |
217 | >>> a.sort(key=versort_key) | |
218 | >>> a | |
219 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
220 | 239 | |
221 | 240 | :func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the |
222 | 241 | `reverse` option). |
226 | 245 | |
227 | 246 | Sometimes you have multiple lists, and you want to sort one of those |
228 | 247 | lists and reorder the other lists according to how the first was sorted. |
229 | To achieve this you would use the :func:`~index_natsorted` or | |
230 | :func:`~index_versorted` in combination with the convenience function | |
248 | To achieve this you could use the :func:`~index_natsorted` in combination | |
249 | with the convenience function | |
231 | 250 | :func:`~order_by_index`:: |
232 | 251 | |
233 | 252 | >>> from natsort import index_natsorted, order_by_index |
296 | 315 | >>> a = [b'a56', b'a5', b'a6', b'a40'] |
297 | 316 | >>> natsorted(a, key=decoder('latin1')) == [b'a5', b'a6', b'a40', b'a56'] |
298 | 317 | True |
318 | ||
319 | Sorting a Pandas DataFrame | |
320 | -------------------------- | |
321 | ||
322 | As of Pandas version 0.16.0, the sorting methods do not accept a ``key`` argument, | |
323 | so you cannot simply pass :func:`natsort_keygen` to a Pandas DataFrame and sort. | |
324 | This request has been made to the Pandas devs; see | |
325 | `issue 3942 <https://github.com/pydata/pandas/issues/3942>`_ if you are interested. | |
326 | If you need to sort a Pandas DataFrame, please check out | |
327 | `this answer on StackOverflow <http://stackoverflow.com/a/29582718/1399279>`_ | |
328 | for ways to do this without the ``key`` argument to ``sort``. |
46 | 46 | >>> natsorted(a) |
47 | 47 | ['a1', 'a2', 'a4', 'a9', 'a10'] |
48 | 48 | |
49 | :func:`~natsorted` identifies real numbers anywhere in a string and sorts them | |
49 | :func:`~natsorted` identifies numbers anywhere in a string and sorts them | |
50 | 50 | naturally. |
51 | 51 | |
52 | Sorting version numbers is just as easy with :func:`~versorted`:: | |
53 | ||
54 | >>> from natsort import versorted | |
52 | Sorting versions is handled properly by default (as of :mod:`natsort` version >= 4.0.0): | |
53 | ||
54 | .. code-block:: python | |
55 | ||
55 | 56 | >>> a = ['version-1.9', 'version-2.0', 'version-1.11', 'version-1.10'] |
56 | >>> versorted(a) | |
57 | >>> natsorted(a) | |
57 | 58 | ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] |
58 | >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work | |
59 | ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] | |
59 | ||
60 | If you need to sort release candidates, please see :ref:`rc_sorting` for | |
61 | a useful hack. | |
60 | 62 | |
61 | 63 | You can also perform locale-aware sorting (or "human sorting"), where the |
62 | 64 | non-numeric characters are ordered based on their meaning, not on their |
63 | ordinal value; this can be achieved with the ``humansorted`` function:: | |
65 | ordinal value; this can be achieved with the :func:`~humansorted` function: | |
66 | ||
67 | .. code-block:: python | |
64 | 68 | |
65 | 69 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] |
66 | 70 | >>> natsorted(a) |
75 | 79 | You may find you need to explicitly set the locale to get this to work |
76 | 80 | (as shown in the example). |
77 | 81 | Please see :ref:`bug_note` and the Installation section |
78 | below before using the ``humansorted`` function. | |
82 | below before using the :func:`~humansorted` function. | |
83 | ||
84 | You can sort signed floats (i.e. real numbers) using the :func:`~realsorted`; | |
85 | this is useful in scientific data analysis. This was the default behavior of | |
86 | :func:`~natsorted` for :mod:`natsort` version < 4.0.0: | |
87 | ||
88 | .. code-block:: python | |
89 | ||
90 | >>> from natsort import realsorted | |
91 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
92 | >>> natsorted(a) | |
93 | ['num2', 'num5.3', 'num5.10', 'num-3'] | |
94 | >>> realsorted(a) | |
95 | ['num-3', 'num2', 'num5.10', 'num5.3'] | |
79 | 96 | |
80 | 97 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
81 | 98 | when you sort:: |
142 | 159 | |
143 | 160 | python setup.py build_sphinx |
144 | 161 | |
145 | :mod:`natsort` requires python version 2.6 or greater | |
146 | (this includes python 3.x). To run version 2.6, 3.0, or 3.1 the | |
147 | `argparse <https://pypi.python.org/pypi/argparse>`_ module is required. | |
162 | :mod:`natsort` requires Python version 2.7 or greater or Python 3.2 or greater. | |
148 | 163 | |
149 | 164 | The most efficient sorting can occur if you install the |
150 | 165 | `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps |
154 | 169 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. |
155 | 170 | |
156 | 171 | On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library |
157 | can be buggy (please see http://bugs.python.org/issue23195), so ``natsort`` will use | |
158 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed | |
159 | on your computer; this will give more reliable cross-platform results. | |
160 | ``natsort`` will not require (or check) that | |
161 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed at installation | |
162 | since in Linux-based systems and Windows systems ``locale`` should work just fine. | |
163 | Please visit https://github.com/SethMMorton/natsort/issues/21 for more details and | |
164 | how to install on Mac OS X. | |
172 | can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is | |
173 | used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, | |
174 | one can | |
175 | ||
176 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
177 | locale. These locales do not suffer from as many problems as "UTF-8" | |
178 | and thus should give expected results. | |
179 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
180 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
181 | will use it under the hood; this will give more | |
182 | reliable cross-platform results in the long run. ``natsort`` will not | |
183 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
184 | is installed at installation. Please visit | |
185 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
186 | how to install on Mac OS X. **Please note** that using | |
187 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
188 | guarantee correct results for all input on BSD-based systems, since | |
189 | every other suggestion is a workaround. | |
190 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
191 | to compensate for a broken ``locale`` library in terms of case-handling; | |
192 | if you do not need to be able to properly handle non-ASCII characters | |
193 | then this may be the best option for you. | |
194 | ||
195 | Note that the above solutions *should not* be required for Windows or | |
196 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
197 | just fine. | |
165 | 198 | |
166 | 199 | :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called |
167 | 200 | from the command line with ``python -m natsort``. The command line script is |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | :func:`~natsort.natsort_key` | |
4 | ============================ | |
5 | ||
6 | .. autofunction:: natsort_key | |
7 |
47 | 47 | Used to exclude an entry that contains a specific |
48 | 48 | number. |
49 | 49 | -r, --reverse Returns in reversed order. |
50 | -t {digit,int,float,version,ver}, --number-type {digit,int,float,version,ver} | |
50 | -t {digit,int,float,version,ver,real,f,i,r,d}, | |
51 | --number-type {digit,int,float,version,ver,real,f,i,r,d}, | |
52 | --number_type {digit,int,float,version,ver,real,f,i,r,d} | |
51 | 53 | Choose the type of number to search for. "float" will |
52 | 54 | search for floating-point numbers. "int" will only |
53 | 55 | search for integers. "digit", "version", and "ver" are |
54 | shortcuts for "int" with --nosign. | |
56 | synonyms for "int"."real" is a shortcut for "float" | |
57 | with --sign. "i" and "d" are synonyms for "int", "f" | |
58 | is a synonym for "float", and "r" is a synonym for | |
59 | "real".The default is int. | |
55 | 60 | --nosign Do not consider "+" or "-" as part of a number, i.e. |
56 | do not take sign into consideration. | |
61 | do not take sign into consideration. This is the | |
62 | default. | |
63 | -s, --sign Consider "+" or "-" as part of a number, i.e. take | |
64 | sign into consideration. The default is unsigned. | |
57 | 65 | --noexp Do not consider an exponential as part of a number, |
58 | 66 | i.e. 1e4, would be considered as 1, "e", and 4, not as |
59 | 67 | 10000. This only effects the --number-type=float. |
60 | --locale, -l Causes natsort to use locale-aware sorting. On some | |
61 | systems, the underlying C library is broken, so if you | |
62 | get results that you do not expect please install | |
63 | PyICU and try again. | |
68 | -l, --locale Causes natsort to use locale-aware sorting. You will | |
69 | get the best results if you install PyICU. | |
64 | 70 | |
65 | 71 | Description |
66 | 72 | ----------- |
83 | 89 | mode943.54.out |
84 | 90 | mode1000.35.out |
85 | 91 | mode1243.34.out |
86 | $ natsort *.out | xargs your_program | |
92 | $ natsort -t r *.out | xargs your_program | |
87 | 93 | |
88 | You can also place natsort in the middle of a pipe:: | |
94 | ``-t r`` is short for ``--number-type real``. You can also place natsort in | |
95 | the middle of a pipe:: | |
89 | 96 | |
90 | $ find . -name "*.out" | natsort | xargs your_program | |
97 | $ find . -name "*.out" | natsort -t r | xargs your_program | |
91 | 98 | |
92 | To sort version numbers, use the ``--number-type version`` option | |
93 | (or ``-t ver`` for short):: | |
99 | To sort version numbers, use the default ``--number-type``:: | |
94 | 100 | |
95 | 101 | $ ls * |
96 | 102 | prog-1.10.zip prog-1.9.zip prog-2.0.zip |
97 | $ natsort -t ver * | |
103 | $ natsort * | |
98 | 104 | prog-1.9.zip |
99 | 105 | prog-1.10.zip |
100 | 106 | prog-2.0.zip |
105 | 111 | |
106 | 112 | $ ls *.out |
107 | 113 | mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out |
108 | $ natsort *.out -f 900 1100 # Select only numbers between 900-1100 | |
114 | $ natsort -t r *.out -f 900 1100 # Select only numbers between 900-1100 | |
109 | 115 | mode943.54.out |
110 | 116 | mode1000.35.out |
111 | $ natsort *.out -F 900 1100 # Select only numbers NOT between 900-1100 | |
117 | $ natsort -t r *.out -F 900 1100 # Select only numbers NOT between 900-1100 | |
112 | 118 | mode744.43.out |
113 | 119 | mode1243.34.out |
114 | $ natsort *.out -e 1000.35 # Exclude 1000.35 from search | |
120 | $ natsort -t r *.out -e 1000.35 # Exclude 1000.35 from search | |
115 | 121 | mode744.43.out |
116 | 122 | mode943.54.out |
117 | 123 | mode1243.34.out |
0 | 0 | # -*- coding: utf-8 -*- |
1 | from __future__ import (print_function, division, | |
2 | unicode_literals, absolute_import) | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
3 | 7 | |
4 | 8 | # Local imports. |
5 | from natsort.natsort import (natsort_key, natsort_keygen, ns, | |
6 | natsorted, humansorted, versorted, | |
7 | realsorted, index_realsorted, | |
8 | index_natsorted, index_versorted, | |
9 | index_humansorted, order_by_index, | |
10 | decoder, as_ascii, as_utf8) | |
9 | from natsort.natsort import ( | |
10 | natsort_key, | |
11 | natsort_keygen, | |
12 | natsorted, | |
13 | versorted, | |
14 | humansorted, | |
15 | realsorted, | |
16 | index_natsorted, | |
17 | index_versorted, | |
18 | index_humansorted, | |
19 | index_realsorted, | |
20 | order_by_index, | |
21 | decoder, | |
22 | as_ascii, | |
23 | as_utf8, | |
24 | ns, | |
25 | ) | |
11 | 26 | from natsort._version import __version__ |
12 | 27 | |
13 | 28 | __all__ = [ |
0 | 0 | # -*- coding: utf-8 -*- |
1 | from __future__ import (print_function, division, | |
2 | unicode_literals, absolute_import) | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
3 | 7 | |
4 | 8 | # Std. lib imports. |
5 | 9 | import sys |
8 | 12 | from natsort.natsort import natsorted, ns |
9 | 13 | from natsort.utils import _regex_and_num_function_chooser |
10 | 14 | from natsort._version import __version__ |
11 | from natsort.py23compat import py23_str | |
15 | from natsort.compat.py23 import py23_str | |
12 | 16 | |
13 | 17 | |
14 | 18 | def main(): |
50 | 54 | help='Returns in reversed order.') |
51 | 55 | parser.add_argument( |
52 | 56 | '-t', '--number-type', '--number_type', dest='number_type', |
53 | choices=('digit', 'int', 'float', 'version', 'ver'), default='float', | |
57 | choices=('digit', 'int', 'float', 'version', 'ver', | |
58 | 'real', 'f', 'i', 'r', 'd'), | |
59 | default='int', | |
54 | 60 | help='Choose the type of number to search for. "float" will search ' |
55 | 61 | 'for floating-point numbers. "int" will only search for ' |
56 | 'integers. "digit", "version", and "ver" are shortcuts for "int" ' | |
57 | 'with --nosign.') | |
58 | parser.add_argument( | |
59 | '--nosign', default=True, action='store_false', dest='signed', | |
62 | 'integers. "digit", "version", and "ver" are synonyms for "int".' | |
63 | '"real" is a shortcut for "float" with --sign. ' | |
64 | '"i" and "d" are synonyms for "int", "f" is a synonym for ' | |
65 | '"float", and "r" is a synonym for "real".' | |
66 | 'The default is %(default)s.') | |
67 | parser.add_argument( | |
68 | '--nosign', default=False, action='store_false', dest='signed', | |
60 | 69 | help='Do not consider "+" or "-" as part of a number, i.e. do not ' |
61 | 'take sign into consideration.') | |
70 | 'take sign into consideration. This is the default.') | |
71 | parser.add_argument( | |
72 | '-s', '--sign', default=False, action='store_true', dest='signed', | |
73 | help='Consider "+" or "-" as part of a number, i.e. ' | |
74 | 'take sign into consideration. The default is unsigned.') | |
62 | 75 | parser.add_argument( |
63 | 76 | '--noexp', default=True, action='store_false', dest='exp', |
64 | 77 | help='Do not consider an exponential as part of a number, i.e. 1e4, ' |
65 | 78 | 'would be considered as 1, "e", and 4, not as 10000. This only ' |
66 | 79 | 'effects the --number-type=float.') |
67 | 80 | parser.add_argument( |
68 | '--locale', '-l', action='store_true', default=False, | |
81 | '-l', '--locale', action='store_true', default=False, | |
69 | 82 | help='Causes natsort to use locale-aware sorting. You will get the ' |
70 | 83 | 'best results if you install PyICU.') |
71 | 84 | parser.add_argument( |
142 | 155 | """Sort the entries, applying the filters first if necessary.""" |
143 | 156 | |
144 | 157 | # Extract the proper number type. |
145 | num_type = {'digit': None, | |
146 | 'version': None, | |
147 | 'ver': None, | |
148 | 'int': int, | |
149 | 'float': float}[args.number_type] | |
150 | unsigned = not args.signed or num_type is None | |
151 | alg = (ns.INT * int(num_type in (int, None)) | | |
152 | ns.UNSIGNED * unsigned | | |
158 | is_float = args.number_type in ('float', 'real', 'f', 'r') | |
159 | signed = args.signed or args.number_type in ('real', 'r') | |
160 | alg = (ns.FLOAT * is_float | | |
161 | ns.SIGNED * signed | | |
153 | 162 | ns.NOEXP * (not args.exp) | |
154 | 163 | ns.PATH * args.paths | |
155 | 164 | ns.LOCALE * args.locale) |
159 | 168 | # as for sorting. |
160 | 169 | do_filter = args.filter is not None or args.reverse_filter is not None |
161 | 170 | if do_filter or args.exclude: |
162 | inp_options = (ns.INT * int(num_type in (int, None)) | | |
163 | ns.UNSIGNED * unsigned | | |
171 | inp_options = (ns.FLOAT * is_float | | |
172 | ns.SIGNED * signed | | |
164 | 173 | ns.NOEXP * (not args.exp), |
165 | 174 | '.' |
166 | 175 | ) |
0 | 0 | # -*- coding: utf-8 -*- |
1 | from __future__ import (print_function, division, | |
2 | unicode_literals, absolute_import) | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
3 | 7 | |
4 | __version__ = '3.5.6' | |
8 | __version__ = '4.0.3' |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | This module is intended to replicate some of the functionality | |
3 | from the fastnumbers module in the event that module is not | |
4 | installed. | |
5 | """ | |
6 | from __future__ import ( | |
7 | print_function, | |
8 | division, | |
9 | unicode_literals, | |
10 | absolute_import | |
11 | ) | |
12 | ||
13 | # Std. lib imports. | |
14 | import sys | |
15 | import re | |
16 | import unicodedata | |
17 | float_re = re.compile(r'[-+]?(\d*\.?\d+(?:[eE][-+]?\d+)?|inf(?:inity)?|nan)$') | |
18 | if sys.version[0] == '2': | |
19 | int_re = re.compile(r'[-+]?\d+[lL]?$') | |
20 | else: | |
21 | int_re = re.compile(r'[-+]?\d+$') | |
22 | long = int | |
23 | unicode = str | |
24 | ||
25 | ||
26 | def fast_float(x, regex_matcher=float_re.match, uni=unicodedata.numeric): | |
27 | """Convert a string to a float quickly""" | |
28 | if type(x) in (int, long, float): | |
29 | return float(x) | |
30 | elif regex_matcher(x): | |
31 | return float(x) | |
32 | elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: | |
33 | return uni(x) | |
34 | else: | |
35 | return x | |
36 | ||
37 | ||
38 | def fast_int(x, regex_matcher=int_re.match, uni=unicodedata.digit): | |
39 | """\ | |
40 | Convert a string to a int quickly, return input as-is if not possible. | |
41 | """ | |
42 | if type(x) in (int, long, float): | |
43 | return int(x) | |
44 | elif regex_matcher(x): | |
45 | return int(x.rstrip('Ll')) | |
46 | elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: | |
47 | return uni(x) | |
48 | else: | |
49 | return x | |
50 | ||
51 | ||
52 | def isfloat(x, num_only=False): | |
53 | """Returns true if the input is a float, false otherwise.""" | |
54 | return type(x) == float | |
55 | ||
56 | ||
57 | def isint(x, num_only=False): | |
58 | """Returns true if the input is an int, false otherwise.""" | |
59 | return type(x) in set([int, long]) |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | ||
8 | # If the user has fastnumbers installed, they will get great speed | |
9 | # benefits. If not, we use the simulated functions that come with natsort. | |
10 | try: | |
11 | from fastnumbers import ( | |
12 | fast_float, | |
13 | fast_int, | |
14 | isint, | |
15 | isfloat, | |
16 | ) | |
17 | import fastnumbers | |
18 | v = list(map(int, fastnumbers.__version__.split('.'))) | |
19 | if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0. | |
20 | raise ImportError | |
21 | except ImportError: | |
22 | from natsort.compat.fake_fastnumbers import ( | |
23 | fast_float, | |
24 | fast_int, | |
25 | isint, | |
26 | isfloat, | |
27 | ) |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | ||
8 | # Std. lib imports | |
9 | import sys | |
10 | ||
11 | # Local imports. | |
12 | from natsort.compat.py23 import PY_VERSION, cmp_to_key | |
13 | ||
14 | # Make the strxfrm function from strcoll on Python2 | |
15 | # It can be buggy (especially on BSD-based systems), | |
16 | # so prefer PyICU if available. | |
17 | try: | |
18 | import PyICU | |
19 | from locale import getlocale | |
20 | ||
21 | # If using PyICU, get the locale from the current global locale, | |
22 | # then create a sort key from that | |
23 | def get_pyicu_transform(l, _d={}): | |
24 | if l not in _d: | |
25 | if l == (None, None): | |
26 | c = PyICU.Collator.createInstance(PyICU.Locale()) | |
27 | else: | |
28 | loc = '.'.join(l) | |
29 | c = PyICU.Collator.createInstance(PyICU.Locale(loc)) | |
30 | _d[l] = c.getSortKey | |
31 | return _d[l] | |
32 | use_pyicu = True | |
33 | null_string = b'' | |
34 | ||
35 | def dumb_sort(): | |
36 | return False | |
37 | except ImportError: | |
38 | if sys.version[0] == '2': | |
39 | from locale import strcoll | |
40 | strxfrm = cmp_to_key(strcoll) | |
41 | null_string = strxfrm('') | |
42 | else: | |
43 | from locale import strxfrm | |
44 | null_string = '' | |
45 | use_pyicu = False | |
46 | ||
47 | # On some systems, locale is broken and does not sort in the expected | |
48 | # order. We will try to detect this and compensate. | |
49 | def dumb_sort(): | |
50 | return strxfrm('A') < strxfrm('a') | |
51 | ||
52 | ||
53 | if PY_VERSION >= 3.3: | |
54 | def _low(x): | |
55 | return x.casefold() | |
56 | else: | |
57 | def _low(x): | |
58 | return x.lower() |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | ||
8 | try: | |
9 | from pathlib import PurePath # PurePath is the base object for Paths. | |
10 | except ImportError: # pragma: no cover | |
11 | PurePath = object # To avoid NameErrors. | |
12 | has_pathlib = False | |
13 | else: | |
14 | has_pathlib = True |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | ||
8 | import functools | |
9 | import sys | |
10 | ||
11 | # These functions are used to make the doctests compatible between | |
12 | # python2 and python3, and also provide uniform functionality between | |
13 | # the two versions. This code is pretty much lifted from the iPython | |
14 | # project's py3compat.py file. Credit to the iPython devs. | |
15 | ||
16 | # Numeric form of version | |
17 | PY_VERSION = float(sys.version[:3]) | |
18 | ||
19 | # Assume all strings are Unicode in Python 2 | |
20 | py23_str = str if sys.version[0] == '3' else unicode | |
21 | ||
22 | # Use the range iterator always | |
23 | py23_range = range if sys.version[0] == '3' else xrange | |
24 | ||
25 | # Uniform base string type | |
26 | py23_basestring = str if sys.version[0] == '3' else basestring | |
27 | ||
28 | # unichr function | |
29 | py23_unichr = chr if sys.version[0] == '3' else unichr | |
30 | ||
31 | # zip as an iterator | |
32 | if sys.version[0] == '3': | |
33 | py23_zip = zip | |
34 | else: | |
35 | import itertools | |
36 | py23_zip = itertools.izip | |
37 | ||
38 | ||
39 | # cmp_to_key was not created till 2.7, so require this for 2.6 | |
40 | try: | |
41 | from functools import cmp_to_key | |
42 | except ImportError: # pragma: no cover | |
43 | def cmp_to_key(mycmp): | |
44 | """Convert a cmp= function into a key= function""" | |
45 | class K(object): | |
46 | __slots__ = ['obj'] | |
47 | ||
48 | def __init__(self, obj): | |
49 | self.obj = obj | |
50 | ||
51 | def __lt__(self, other): | |
52 | return mycmp(self.obj, other.obj) < 0 | |
53 | ||
54 | def __gt__(self, other): | |
55 | return mycmp(self.obj, other.obj) > 0 | |
56 | ||
57 | def __eq__(self, other): | |
58 | return mycmp(self.obj, other.obj) == 0 | |
59 | ||
60 | def __le__(self, other): | |
61 | return mycmp(self.obj, other.obj) <= 0 | |
62 | ||
63 | def __ge__(self, other): | |
64 | return mycmp(self.obj, other.obj) >= 0 | |
65 | ||
66 | def __ne__(self, other): | |
67 | return mycmp(self.obj, other.obj) != 0 | |
68 | ||
69 | def __hash__(self): | |
70 | raise TypeError('hash not implemented') | |
71 | ||
72 | return K | |
73 | ||
74 | ||
75 | # This function is intended to decorate other functions that will modify | |
76 | # either a string directly, or a function's docstring. | |
77 | def _modify_str_or_docstring(str_change_func): | |
78 | @functools.wraps(str_change_func) | |
79 | def wrapper(func_or_str): | |
80 | if isinstance(func_or_str, py23_basestring): | |
81 | func = None | |
82 | doc = func_or_str | |
83 | else: | |
84 | func = func_or_str | |
85 | doc = func.__doc__ | |
86 | ||
87 | doc = str_change_func(doc) | |
88 | ||
89 | if func: | |
90 | func.__doc__ = doc | |
91 | return func | |
92 | return doc | |
93 | return wrapper | |
94 | ||
95 | ||
96 | # Properly modify a doctstring to either have the unicode literal or not. | |
97 | if sys.version[0] == '3': | |
98 | # Abstract u'abc' syntax: | |
99 | @_modify_str_or_docstring | |
100 | def u_format(s): | |
101 | """"{u}'abc'" --> "'abc'" (Python 3) | |
102 | ||
103 | Accepts a string or a function, so it can be used as a decorator.""" | |
104 | return s.format(u='') | |
105 | else: | |
106 | # Abstract u'abc' syntax: | |
107 | @_modify_str_or_docstring | |
108 | def u_format(s): | |
109 | """"{u}'abc'" --> "u'abc'" (Python 2) | |
110 | ||
111 | Accepts a string or a function, so it can be used as a decorator.""" | |
112 | return s.format(u='u') |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | This module is intended to replicate some of the functionality | |
3 | from the fastnumbers module in the event that module is not | |
4 | installed. | |
5 | """ | |
6 | from __future__ import (print_function, division, | |
7 | unicode_literals, absolute_import) | |
8 | ||
9 | # Std. lib imports. | |
10 | import re | |
11 | ||
12 | float_re = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$') | |
13 | int_re = re.compile(r'[-+]?\d+$') | |
14 | ||
15 | ||
16 | def fast_float(x, regex_matcher=float_re.match): | |
17 | """Convert a string to a float quickly""" | |
18 | return float(x) if regex_matcher(x) else x | |
19 | ||
20 | ||
21 | def fast_int(x, regex_matcher=int_re.match): | |
22 | """\ | |
23 | Convert a string to a int quickly, return input as-is if not possible. | |
24 | """ | |
25 | return int(x) if regex_matcher(x) else x | |
26 | ||
27 | ||
28 | def isreal(x, ntypes=set([int, float])): | |
29 | """Returns true if the input is a real number, false otherwise.""" | |
30 | return type(x) in ntypes |
3 | 3 | together for natsort consumption. It also accounts for Python2 |
4 | 4 | and Python3 differences. |
5 | 5 | """ |
6 | from __future__ import (print_function, division, | |
7 | unicode_literals, absolute_import) | |
6 | from __future__ import ( | |
7 | print_function, | |
8 | division, | |
9 | unicode_literals, | |
10 | absolute_import | |
11 | ) | |
8 | 12 | |
9 | 13 | # Std. lib imports. |
10 | import sys | |
11 | 14 | from itertools import chain |
12 | 15 | from locale import localeconv |
13 | 16 | |
14 | 17 | # Local imports. |
15 | from natsort.py23compat import py23_zip | |
16 | ||
17 | # If the user has fastnumbers installed, they will get great speed | |
18 | # benefits. If not, we simulate the functions here. | |
19 | try: | |
20 | from fastnumbers import isreal | |
21 | except ImportError: | |
22 | from natsort.fake_fastnumbers import isreal | |
23 | ||
24 | # We need cmp_to_key for Python2 because strxfrm is broken for unicode. | |
25 | if sys.version[:3] == '2.7': | |
26 | from functools import cmp_to_key | |
27 | # cmp_to_key was not created till 2.7. | |
28 | elif sys.version[:3] == '2.6': | |
29 | def cmp_to_key(mycmp): # pragma: no cover | |
30 | """Convert a cmp= function into a key= function""" | |
31 | class K(object): | |
32 | __slots__ = ['obj'] | |
33 | ||
34 | def __init__(self, obj): | |
35 | self.obj = obj | |
36 | ||
37 | def __lt__(self, other): | |
38 | return mycmp(self.obj, other.obj) < 0 | |
39 | ||
40 | def __gt__(self, other): | |
41 | return mycmp(self.obj, other.obj) > 0 | |
42 | ||
43 | def __eq__(self, other): | |
44 | return mycmp(self.obj, other.obj) == 0 | |
45 | ||
46 | def __le__(self, other): | |
47 | return mycmp(self.obj, other.obj) <= 0 | |
48 | ||
49 | def __ge__(self, other): | |
50 | return mycmp(self.obj, other.obj) >= 0 | |
51 | ||
52 | def __ne__(self, other): | |
53 | return mycmp(self.obj, other.obj) != 0 | |
54 | ||
55 | def __hash__(self): | |
56 | raise TypeError('hash not implemented') | |
57 | ||
58 | return K | |
59 | ||
60 | # Make the strxfrm function from strcoll on Python2 | |
61 | # It can be buggy (especially on BSD-based systems), | |
62 | # so prefer PyICU if available. | |
63 | try: | |
64 | import PyICU | |
65 | from locale import getlocale | |
66 | ||
67 | # If using PyICU, get the locale from the current global locale, | |
68 | # then create a sort key from that | |
69 | def get_pyicu_transform(l, _d={}): | |
70 | if l not in _d: | |
71 | if l == (None, None): | |
72 | c = PyICU.Collator.createInstance(PyICU.Locale()) | |
73 | else: | |
74 | loc = '.'.join(l) | |
75 | c = PyICU.Collator.createInstance(PyICU.Locale(loc)) | |
76 | _d[l] = c.getSortKey | |
77 | return _d[l] | |
78 | use_pyicu = True | |
79 | null_string = b'' | |
80 | except ImportError: | |
81 | if sys.version[0] == '2': | |
82 | from locale import strcoll | |
83 | strxfrm = cmp_to_key(strcoll) | |
84 | null_string = strxfrm('') | |
85 | else: | |
86 | from locale import strxfrm | |
87 | null_string = '' | |
88 | use_pyicu = False | |
18 | from natsort.compat.locale import use_pyicu, _low | |
19 | if use_pyicu: | |
20 | from natsort.compat.locale import get_pyicu_transform, getlocale | |
21 | else: | |
22 | from natsort.compat.locale import strxfrm | |
89 | 23 | |
90 | 24 | |
91 | 25 | def groupletters(x): |
92 | 26 | """Double all characters, making doubled letters lowercase.""" |
93 | return ''.join(chain(*py23_zip(x.lower(), x))) | |
27 | return ''.join(chain.from_iterable([_low(y), y] for y in x)) | |
94 | 28 | |
95 | 29 | |
96 | 30 | def grouper(val, func): |
101 | 35 | """ |
102 | 36 | # Return the number or transformed string. |
103 | 37 | # If the input is identical to the output, then no conversion happened. |
104 | s = func(val) | |
105 | return groupletters(s) if val is s else s | |
38 | s = func[0](val) | |
39 | return groupletters(s) if not func[1](s) else s | |
106 | 40 | |
107 | 41 | |
108 | 42 | def locale_convert(val, func, group): |
118 | 52 | s = val.replace(radix, '.') if radix != '.' else val |
119 | 53 | |
120 | 54 | # Perform the conversion |
121 | t = func(s) | |
55 | t = func[0](s) | |
122 | 56 | |
123 | 57 | # Return the number or transformed string. |
124 | 58 | # If the input is identical to the output, then no conversion happened. |
128 | 62 | if group: |
129 | 63 | if use_pyicu: |
130 | 64 | xfrm = get_pyicu_transform(getlocale()) |
131 | return xfrm(groupletters(val)) if not isreal(t) else t | |
65 | return xfrm(groupletters(val)) if not func[1](t) else t | |
132 | 66 | else: |
133 | return strxfrm(groupletters(val)) if not isreal(t) else t | |
67 | return strxfrm(groupletters(val)) if not func[1](t) else t | |
134 | 68 | else: |
135 | 69 | if use_pyicu: |
136 | 70 | xfrm = get_pyicu_transform(getlocale()) |
137 | return xfrm(val) if not isreal(t) else t | |
71 | return xfrm(val) if not func[1](t) else t | |
138 | 72 | else: |
139 | return strxfrm(val) if not isreal(t) else t | |
73 | return strxfrm(val) if not func[1](t) else t |
8 | 8 | descend into lists of lists so you can sort by the sublist contents. |
9 | 9 | |
10 | 10 | See the README or the natsort homepage for more details. |
11 | ||
12 | 11 | """ |
13 | ||
14 | from __future__ import (print_function, division, | |
15 | unicode_literals, absolute_import) | |
12 | from __future__ import ( | |
13 | print_function, | |
14 | division, | |
15 | unicode_literals, | |
16 | absolute_import | |
17 | ) | |
16 | 18 | |
17 | 19 | # Std lib. imports. |
18 | 20 | import re |
21 | 23 | from warnings import warn |
22 | 24 | |
23 | 25 | # Local imports. |
24 | from natsort.utils import _natsort_key, _args_to_enum, _do_decoding | |
25 | 26 | from natsort.ns_enum import ns |
26 | from natsort.py23compat import u_format | |
27 | from natsort.compat.py23 import u_format | |
28 | from natsort.utils import ( | |
29 | _natsort_key, | |
30 | _args_to_enum, | |
31 | _do_decoding, | |
32 | ) | |
27 | 33 | |
28 | 34 | # Make sure the doctest works for either python2 or python3 |
29 | 35 | __doc__ = u_format(__doc__) |
121 | 127 | return _do_decoding(s, 'utf-8') |
122 | 128 | |
123 | 129 | |
124 | @u_format | |
125 | def natsort_key(val, key=None, number_type=float, signed=None, exp=None, | |
126 | as_path=None, py3_safe=None, alg=0): | |
127 | """\ | |
128 | Key to sort strings and numbers naturally. | |
129 | ||
130 | Key to sort strings and numbers naturally, not lexicographically. | |
131 | It is designed for use in passing to the 'sorted' builtin or | |
132 | 'sort' attribute of lists. | |
133 | ||
134 | .. note:: Deprecated since version 3.4.0. | |
135 | This function remains in the publicly exposed API for | |
136 | backwards-compatibility reasons, but future development | |
137 | should use the newer `natsort_keygen` function. It is | |
138 | planned to remove this from the public API in natsort | |
139 | version 4.0.0. A DeprecationWarning will be raised | |
140 | via the warnings module; set warnings.simplefilter("always") | |
141 | to raise them to see if your code will work in version | |
142 | 4.0.0. | |
143 | ||
144 | Parameters | |
145 | ---------- | |
146 | val : {{str, unicode}} | |
147 | The value used by the sorting algorithm | |
148 | ||
130 | def natsort_key(val, key=None, alg=0, **_kwargs): | |
131 | """Undocumented, kept for backwards-compatibility.""" | |
132 | msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" | |
133 | warn(msg, DeprecationWarning) | |
134 | return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg) | |
135 | ||
136 | ||
137 | @u_format | |
138 | def natsort_keygen(key=None, alg=0, **_kwargs): | |
139 | """\ | |
140 | Generate a key to sort strings and numbers naturally. | |
141 | ||
142 | Generate a key to sort strings and numbers naturally, | |
143 | not lexicographically. This key is designed for use as the | |
144 | `key` argument to functions such as the `sorted` builtin. | |
145 | ||
146 | The user may customize the generated function with the | |
147 | arguments to `natsort_keygen`, including an optional | |
148 | `key` function which will be called before the `natsort_key`. | |
149 | ||
150 | Parameters | |
151 | ---------- | |
149 | 152 | key : callable, optional |
150 | 153 | A key used to manipulate the input value before parsing for |
151 | 154 | numbers. It is **not** applied recursively. |
152 | 155 | It should accept a single argument and return a single value. |
153 | 156 | |
154 | number_type : {{None, float, int}}, optional | |
155 | Deprecated as of version 3.5.0 and will become an undocumented | |
156 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
157 | for all future development. See :class:`ns` class documentation for | |
158 | details. | |
159 | ||
160 | signed : {{True, False}}, optional | |
161 | Deprecated as of version 3.5.0 and will become an undocumented | |
162 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
163 | for all future development. See :class:`ns` class documentation for | |
164 | details. | |
165 | ||
166 | exp : {{True, False}}, optional | |
167 | Deprecated as of version 3.5.0 and will become an undocumented | |
168 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
169 | for all future development. See :class:`ns` class documentation for | |
170 | details. | |
171 | ||
172 | as_path : {{True, False}}, optional | |
173 | Deprecated as of version 3.5.0 and will become an undocumented | |
174 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
175 | for all future development. See :class:`ns` class documentation for | |
176 | details. | |
177 | ||
178 | py3_safe : {{True, False}}, optional | |
179 | Deprecated as of version 3.5.0 and will become an undocumented | |
180 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
181 | for all future development. See :class:`ns` class documentation for | |
182 | details. | |
183 | ||
184 | 157 | alg : ns enum, optional |
185 | 158 | This option is used to control which algorithm `natsort` |
186 | 159 | uses when sorting. For details into these options, please see |
187 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
188 | ||
189 | Returns | |
190 | ------- | |
191 | out : tuple | |
192 | The modified value with numbers extracted. | |
193 | ||
194 | See Also | |
195 | -------- | |
196 | natsort_keygen : Generates a properly wrapped `natsort_key`. | |
197 | ||
198 | Examples | |
199 | -------- | |
200 | Using natsort_key is just like any other sorting key in python:: | |
201 | ||
202 | >>> a = ['num3', 'num5', 'num2'] | |
203 | >>> a.sort(key=natsort_key) | |
204 | >>> a | |
205 | [{u}'num2', {u}'num3', {u}'num5'] | |
206 | ||
207 | It works by separating out the numbers from the strings:: | |
208 | ||
209 | >>> natsort_key('num2') | |
210 | ({u}'num', 2.0) | |
211 | ||
212 | If you need to call natsort_key with the number_type argument, or get a | |
213 | special attribute or item of each element of the sequence, please use | |
214 | the `natsort_keygen` function. Actually, please just use the | |
215 | `natsort_keygen` function. | |
216 | ||
217 | Notes | |
218 | ----- | |
219 | Iterables are parsed recursively so you can sort lists of lists:: | |
220 | ||
221 | >>> natsort_key(('a1', 'a10')) | |
222 | (({u}'a', 1.0), ({u}'a', 10.0)) | |
223 | ||
224 | Strings that lead with a number get an empty string at the front of the | |
225 | tuple. This is designed to get around the "unorderable types" issue of | |
226 | Python3:: | |
227 | ||
228 | >>> natsort_key('15a') | |
229 | ({u}'', 15.0, {u}'a') | |
230 | ||
231 | You can give bare numbers, too:: | |
232 | ||
233 | >>> natsort_key(10) | |
234 | ({u}'', 10) | |
235 | ||
236 | If you have a case where one of your string has two numbers in a row, | |
237 | you can turn on the "py3_safe" option to try to add a "" between sets | |
238 | of two numbers:: | |
239 | ||
240 | >>> natsort_key('43h7+3', py3_safe=True) | |
241 | ({u}'', 43.0, {u}'h', 7.0, {u}'', 3.0) | |
242 | ||
243 | """ | |
244 | msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" | |
245 | warn(msg, DeprecationWarning) | |
246 | alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg | |
247 | return _natsort_key(val, key, alg) | |
248 | ||
249 | ||
250 | @u_format | |
251 | def natsort_keygen(key=None, number_type=float, signed=None, exp=None, | |
252 | as_path=None, py3_safe=None, alg=0): | |
253 | """\ | |
254 | Generate a key to sort strings and numbers naturally. | |
255 | ||
256 | Generate a key to sort strings and numbers naturally, | |
257 | not lexicographically. This key is designed for use as the | |
258 | `key` argument to functions such as the `sorted` builtin. | |
259 | ||
260 | The user may customize the generated function with the | |
261 | arguments to `natsort_keygen`, including an optional | |
262 | `key` function which will be called before the `natsort_key`. | |
263 | ||
264 | Parameters | |
265 | ---------- | |
266 | key : callable, optional | |
267 | A key used to manipulate the input value before parsing for | |
268 | numbers. It is **not** applied recursively. | |
269 | It should accept a single argument and return a single value. | |
270 | ||
271 | number_type : {{None, float, int}}, optional | |
272 | Deprecated as of version 3.5.0 and will become an undocumented | |
273 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
274 | for all future development. See :class:`ns` class documentation for | |
275 | details. | |
276 | ||
277 | signed : {{True, False}}, optional | |
278 | Deprecated as of version 3.5.0 and will become an undocumented | |
279 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
280 | for all future development. See :class:`ns` class documentation for | |
281 | details. | |
282 | ||
283 | exp : {{True, False}}, optional | |
284 | Deprecated as of version 3.5.0 and will become an undocumented | |
285 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
286 | for all future development. See :class:`ns` class documentation for | |
287 | details. | |
288 | ||
289 | as_path : {{True, False}}, optional | |
290 | Deprecated as of version 3.5.0 and will become an undocumented | |
291 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
292 | for all future development. See :class:`ns` class documentation for | |
293 | details. | |
294 | ||
295 | py3_safe : {{True, False}}, optional | |
296 | Deprecated as of version 3.5.0 and will become an undocumented | |
297 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
298 | for all future development. See :class:`ns` class documentation for | |
299 | details. | |
300 | ||
301 | alg : ns enum, optional | |
302 | This option is used to control which algorithm `natsort` | |
303 | uses when sorting. For details into these options, please see | |
304 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
160 | the :class:`ns` class documentation. The default is `ns.INT`. | |
305 | 161 | |
306 | 162 | Returns |
307 | 163 | ------- |
310 | 166 | suitable for passing as the `key` argument to functions |
311 | 167 | such as `sorted`. |
312 | 168 | |
169 | See Also | |
170 | -------- | |
171 | natsorted | |
172 | ||
313 | 173 | Examples |
314 | 174 | -------- |
315 | 175 | `natsort_keygen` is a convenient way to create a custom key |
317 | 177 | will return a plain `natsort_key` instance:: |
318 | 178 | |
319 | 179 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] |
320 | >>> b = a[:] | |
321 | >>> a.sort(key=natsort_key) | |
322 | >>> b.sort(key=natsort_keygen()) | |
323 | >>> a == b | |
324 | True | |
325 | ||
326 | The power of `natsort_keygen` is when you want to want to pass | |
327 | arguments to the `natsort_key`. Consider the following | |
328 | equivalent examples; which is more clear? :: | |
329 | ||
330 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
331 | >>> b = a[:] | |
332 | >>> a.sort(key=lambda x: natsort_key(x, key=lambda y: y.upper(), | |
333 | ... signed=False)) | |
334 | >>> b.sort(key=natsort_keygen(key=lambda x: x.upper(), signed=False)) | |
335 | >>> a == b | |
336 | True | |
337 | ||
338 | """ | |
339 | alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg | |
340 | return partial(_natsort_key, key=key, alg=alg) | |
341 | ||
342 | ||
343 | @u_format | |
344 | def natsorted(seq, key=None, number_type=float, signed=None, exp=None, | |
345 | reverse=False, as_path=None, alg=0): | |
180 | >>> a.sort(key=natsort_keygen(alg=ns.REAL)) | |
181 | >>> a | |
182 | [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] | |
183 | ||
184 | """ | |
185 | return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg) | |
186 | ||
187 | ||
188 | @u_format | |
189 | def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
346 | 190 | """\ |
347 | 191 | Sorts a sequence naturally. |
348 | 192 | |
360 | 204 | It is **not** applied recursively. |
361 | 205 | It should accept a single argument and return a single value. |
362 | 206 | |
363 | number_type : {{None, float, int}}, optional | |
364 | Deprecated as of version 3.5.0 and will become an undocumented | |
365 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
366 | for all future development. See :class:`ns` class documentation for | |
367 | details. | |
368 | ||
369 | signed : {{True, False}}, optional | |
370 | Deprecated as of version 3.5.0 and will become an undocumented | |
371 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
372 | for all future development. See :class:`ns` class documentation for | |
373 | details. | |
374 | ||
375 | exp : {{True, False}}, optional | |
376 | Deprecated as of version 3.5.0 and will become an undocumented | |
377 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
378 | for all future development. See :class:`ns` class documentation for | |
379 | details. | |
380 | ||
381 | 207 | reverse : {{True, False}}, optional |
382 | 208 | Return the list in reversed sorted order. The default is |
383 | 209 | `False`. |
384 | 210 | |
385 | as_path : {{True, False}}, optional | |
386 | Deprecated as of version 3.5.0 and will become an undocumented | |
387 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
388 | for all future development. See :class:`ns` class documentation for | |
389 | details. | |
390 | ||
391 | 211 | alg : ns enum, optional |
392 | 212 | This option is used to control which algorithm `natsort` |
393 | 213 | uses when sorting. For details into these options, please see |
394 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
214 | the :class:`ns` class documentation. The default is `ns.INT`. | |
395 | 215 | |
396 | 216 | Returns |
397 | 217 | ------- |
401 | 221 | See Also |
402 | 222 | -------- |
403 | 223 | natsort_keygen : Generates the key that makes natural sorting possible. |
404 | versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``. | |
405 | realsorted : Identical to ``natsorted(seq)``; for forwards-compatibility. | |
224 | realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. | |
406 | 225 | humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. |
407 | 226 | index_natsorted : Returns the sorted indexes from `natsorted`. |
408 | 227 | |
415 | 234 | [{u}'num2', {u}'num3', {u}'num5'] |
416 | 235 | |
417 | 236 | """ |
418 | alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg | |
237 | alg = _args_to_enum(**_kwargs) | alg | |
419 | 238 | try: |
420 | return sorted(seq, reverse=reverse, | |
421 | key=natsort_keygen(key, alg=alg)) | |
239 | return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg=alg)) | |
422 | 240 | except TypeError as e: # pragma: no cover |
423 | 241 | # In the event of an unresolved "unorderable types" error |
424 | 242 | # for string to number type comparisons (not str/bytes), |
434 | 252 | |
435 | 253 | |
436 | 254 | @u_format |
437 | def versorted(seq, key=None, reverse=False, as_path=None, alg=0): | |
438 | """\ | |
439 | Convenience function to sort version numbers. | |
440 | ||
441 | Convenience function to sort version numbers. This is a wrapper | |
442 | around ``natsorted(seq, alg=ns.VERSION)``. | |
255 | def versorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
256 | """\ | |
257 | Identical to :func:`natsorted`. | |
258 | ||
259 | This function exists for backwards compatibility with `natsort` | |
260 | version < 4.0.0. Future development should use :func:`natsorted`. | |
261 | ||
262 | Please see the :func:`natsorted` documentation for use. | |
263 | ||
264 | See Also | |
265 | -------- | |
266 | natsorted | |
267 | ||
268 | """ | |
269 | return natsorted(seq, key, reverse, alg, **_kwargs) | |
270 | ||
271 | ||
272 | @u_format | |
273 | def humansorted(seq, key=None, reverse=False, alg=0): | |
274 | """\ | |
275 | Convenience function to properly sort non-numeric characters. | |
276 | ||
277 | Convenience function to properly sort non-numeric characters | |
278 | in a locale-aware fashion (a.k.a "human sorting"). This is a | |
279 | wrapper around ``natsorted(seq, alg=ns.LOCALE)``. | |
280 | ||
281 | .. warning:: On BSD-based systems (like Mac OS X), the underlying | |
282 | C library that Python's locale module uses is broken. | |
283 | On these systems it is recommended that you install | |
284 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
285 | if you wish to use ``humansorted``, especially if you need | |
286 | to handle non-ASCII characters. If you are on | |
287 | one of systems and get unexpected results, please try | |
288 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
289 | before filing a bug report to `natsort`. | |
443 | 290 | |
444 | 291 | Parameters |
445 | 292 | ---------- |
455 | 302 | Return the list in reversed sorted order. The default is |
456 | 303 | `False`. |
457 | 304 | |
458 | as_path : {{True, False}}, optional | |
459 | Deprecated as of version 3.5.0 and will become an undocumented | |
460 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
461 | for all future development. See :class:`ns` class documentation for | |
462 | details. | |
463 | ||
464 | alg : ns enum, optional | |
465 | This option is used to control which algorithm `natsort` | |
466 | uses when sorting. For details into these options, please see | |
467 | the :class:`ns` class documentation. The default is `ns.VERSION`. | |
468 | ||
469 | Returns | |
470 | ------- | |
471 | out : list | |
472 | The sorted sequence. | |
473 | ||
474 | See Also | |
475 | -------- | |
476 | index_versorted : Returns the sorted indexes from `versorted`. | |
477 | ||
478 | Examples | |
479 | -------- | |
480 | Use `versorted` just like the builtin `sorted`:: | |
481 | ||
482 | >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] | |
483 | >>> versorted(a) | |
484 | [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] | |
485 | ||
486 | """ | |
487 | alg = _args_to_enum(float, None, None, as_path, None) | alg | |
488 | return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) | |
489 | ||
490 | ||
491 | @u_format | |
492 | def humansorted(seq, key=None, reverse=False, alg=0): | |
493 | """\ | |
494 | Convenience function to properly sort non-numeric characters. | |
495 | ||
496 | Convenience function to properly sort non-numeric characters | |
497 | in a locale-aware fashion (a.k.a "human sorting"). This is a | |
498 | wrapper around ``natsorted(seq, alg=ns.LOCALE)``. | |
499 | ||
500 | .. warning:: On BSD-based systems (like Mac OS X), the underlying | |
501 | C library that Python's locale module uses is broken. | |
502 | On these systems it is recommended that you install | |
503 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
504 | if you wish to use ``humansorted``. If you are on | |
505 | one of systems and get unexpected results, please try | |
506 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
507 | before filing a bug report to `natsort`. | |
508 | ||
509 | Parameters | |
510 | ---------- | |
511 | seq : iterable | |
512 | The sequence to sort. | |
513 | ||
514 | key : callable, optional | |
515 | A key used to determine how to sort each element of the sequence. | |
516 | It is **not** applied recursively. | |
517 | It should accept a single argument and return a single value. | |
518 | ||
519 | reverse : {{True, False}}, optional | |
520 | Return the list in reversed sorted order. The default is | |
521 | `False`. | |
522 | ||
523 | 305 | alg : ns enum, optional |
524 | 306 | This option is used to control which algorithm `natsort` |
525 | 307 | uses when sorting. For details into these options, please see |
537 | 319 | Notes |
538 | 320 | ----- |
539 | 321 | You may find that if you do not explicitly set |
540 | the locale your results may not be as you expect... I have found that | |
541 | it depends on the system you are on. To do this is straightforward | |
542 | (in the below example I use 'en_US.UTF-8', but you should use your | |
543 | locale):: | |
322 | the locale your results may not be as you expect, although | |
323 | as of ``natsort`` version 4.0.0 the sorting algorithm has been | |
324 | updated to account for a buggy ``locale`` installation. | |
325 | In the below example 'en_US.UTF-8' is used, but you should use your | |
326 | locale:: | |
544 | 327 | |
545 | 328 | >>> import locale |
546 | 329 | >>> # The 'str' call is only to get around a bug on Python 2.x |
551 | 334 | |
552 | 335 | It is preferred that you do this before importing `natsort`. |
553 | 336 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning |
554 | above) then you should not need to do this. | |
337 | above) then you should not need to do explicitly set a locale. | |
555 | 338 | |
556 | 339 | Examples |
557 | 340 | -------- |
564 | 347 | [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana'] |
565 | 348 | |
566 | 349 | """ |
567 | return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) | |
350 | return natsorted(seq, key, reverse, alg | ns.LOCALE) | |
568 | 351 | |
569 | 352 | |
570 | 353 | @u_format |
571 | 354 | def realsorted(seq, key=None, reverse=False, alg=0): |
572 | 355 | """\ |
573 | Identical to :func:`natsorted`. | |
574 | ||
575 | This is provided for forward-compatibility with :mod:`natsort` | |
576 | version >= 4.0.0. If you are relying on the default sorting | |
577 | behavior of :func:`natsorted` to sort by signed floats, | |
578 | you should consider using this function as the default sorting | |
579 | behavior of :func:`natsorted` will changed to unsigned | |
580 | integers in :mod:`natsort` version >= 4.0.0. | |
356 | Convenience function to properly sort signed floats. | |
357 | ||
358 | Convenience function to properly sort signed floats within | |
359 | strings (i.e. "a-5.7"). This is a wrapper around | |
360 | ``natsorted(seq, alg=ns.REAL)``. | |
361 | ||
362 | The behavior of :func:`realsorted` for `natsort` version >= 4.0.0 | |
363 | was the default behavior of :func:`natsorted` for `natsort` | |
364 | version < 4.0.0. | |
581 | 365 | |
582 | 366 | Parameters |
583 | 367 | ---------- |
596 | 380 | alg : ns enum, optional |
597 | 381 | This option is used to control which algorithm `natsort` |
598 | 382 | uses when sorting. For details into these options, please see |
599 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
383 | the :class:`ns` class documentation. The default is `ns.REAL`. | |
600 | 384 | |
601 | 385 | Returns |
602 | 386 | ------- |
612 | 396 | Use `realsorted` just like the builtin `sorted`:: |
613 | 397 | |
614 | 398 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] |
399 | >>> natsorted(a) | |
400 | [{u}'num2', {u}'num5.3', {u}'num5.10', {u}'num-3'] | |
615 | 401 | >>> realsorted(a) |
616 | 402 | [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] |
617 | 403 | |
618 | 404 | """ |
619 | return natsorted(seq, key=key, reverse=reverse, alg=alg) | |
620 | ||
621 | ||
622 | @u_format | |
623 | def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, | |
624 | reverse=False, as_path=None, alg=0): | |
405 | return natsorted(seq, key, reverse, alg | ns.REAL) | |
406 | ||
407 | ||
408 | @u_format | |
409 | def index_natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
625 | 410 | """\ |
626 | 411 | Return the list of the indexes used to sort the input sequence. |
627 | 412 | |
640 | 425 | It is **not** applied recursively. |
641 | 426 | It should accept a single argument and return a single value. |
642 | 427 | |
643 | number_type : {{None, float, int}}, optional | |
644 | Deprecated as of version 3.5.0 and will become an undocumented | |
645 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
646 | for all future development. See :class:`ns` class documentation for | |
647 | details. | |
648 | ||
649 | signed : {{True, False}}, optional | |
650 | Deprecated as of version 3.5.0 and will become an undocumented | |
651 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
652 | for all future development. See :class:`ns` class documentation for | |
653 | details. | |
654 | ||
655 | exp : {{True, False}}, optional | |
656 | Deprecated as of version 3.5.0 and will become an undocumented | |
657 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
658 | for all future development. See :class:`ns` class documentation for | |
659 | details. | |
660 | ||
661 | 428 | reverse : {{True, False}}, optional |
662 | 429 | Return the list in reversed sorted order. The default is |
663 | 430 | `False`. |
664 | 431 | |
665 | as_path : {{True, False}}, optional | |
666 | Deprecated as of version 3.5.0 and will become an undocumented | |
667 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
668 | for all future development. See :class:`ns` class documentation for | |
669 | details. | |
670 | ||
671 | 432 | alg : ns enum, optional |
672 | 433 | This option is used to control which algorithm `natsort` |
673 | 434 | uses when sorting. For details into these options, please see |
674 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
435 | the :class:`ns` class documentation. The default is `ns.INT`. | |
675 | 436 | |
676 | 437 | Returns |
677 | 438 | ------- |
701 | 462 | [{u}'baz', {u}'foo', {u}'bar'] |
702 | 463 | |
703 | 464 | """ |
704 | alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg | |
465 | alg = _args_to_enum(**_kwargs) | alg | |
705 | 466 | if key is None: |
706 | 467 | newkey = itemgetter(1) |
707 | 468 | else: |
726 | 487 | |
727 | 488 | |
728 | 489 | @u_format |
729 | def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0): | |
490 | def index_versorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
491 | """\ | |
492 | Identical to :func:`index_natsorted`. | |
493 | ||
494 | This function exists for backwards compatibility with | |
495 | ``index_natsort`` version < 4.0.0. Future development should use | |
496 | :func:`index_natsorted`. | |
497 | ||
498 | Please see the :func:`index_natsorted` documentation for use. | |
499 | ||
500 | See Also | |
501 | -------- | |
502 | index_natsorted | |
503 | ||
504 | """ | |
505 | return index_natsorted(seq, key, reverse, alg, **_kwargs) | |
506 | ||
507 | ||
508 | @u_format | |
509 | def index_humansorted(seq, key=None, reverse=False, alg=0): | |
730 | 510 | """\ |
731 | 511 | Return the list of the indexes used to sort the input sequence |
732 | of version numbers. | |
733 | ||
734 | Sorts a sequence of version, but returns a list of sorted the | |
735 | indexes and not the sorted list. This list of indexes can be | |
736 | used to sort multiple lists by the sorted order of the given | |
737 | sequence. | |
738 | ||
739 | This is a wrapper around ``index_natsorted(seq, number_type=None)``. | |
512 | in a locale-aware manner. | |
513 | ||
514 | Sorts a sequence in a locale-aware manner, but returns a list | |
515 | of sorted the indexes and not the sorted list. This list of | |
516 | indexes can be used to sort multiple lists by the sorted order | |
517 | of the given sequence. | |
518 | ||
519 | This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. | |
520 | Please see the ``humansorted`` documentation for caveats of | |
521 | using ``index_humansorted``. | |
740 | 522 | |
741 | 523 | Parameters |
742 | 524 | ---------- |
752 | 534 | Return the list in reversed sorted order. The default is |
753 | 535 | `False`. |
754 | 536 | |
755 | as_path : {{True, False}}, optional | |
756 | Deprecated as of version 3.5.0 and will become an undocumented | |
757 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
758 | for all future development. See :class:`ns` class documentation for | |
759 | details. | |
760 | ||
761 | alg : ns enum, optional | |
762 | This option is used to control which algorithm `natsort` | |
763 | uses when sorting. For details into these options, please see | |
764 | the :class:`ns` class documentation. The default is `ns.VERSION`. | |
765 | ||
766 | Returns | |
767 | ------- | |
768 | out : tuple | |
769 | The ordered indexes of the sequence. | |
770 | ||
771 | See Also | |
772 | -------- | |
773 | versorted | |
774 | order_by_index | |
775 | ||
776 | Examples | |
777 | -------- | |
778 | Use `index_versorted` just like the builtin `sorted`:: | |
779 | ||
780 | >>> a = ['num4.0.2', 'num3.4.1', 'num3.4.2'] | |
781 | >>> index_versorted(a) | |
782 | [1, 2, 0] | |
783 | ||
784 | """ | |
785 | alg = _args_to_enum(float, None, None, as_path, None) | alg | |
786 | return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) | |
787 | ||
788 | ||
789 | @u_format | |
790 | def index_humansorted(seq, key=None, reverse=False, alg=0): | |
791 | """\ | |
792 | Return the list of the indexes used to sort the input sequence | |
793 | in a locale-aware manner. | |
794 | ||
795 | Sorts a sequence in a locale-aware manner, but returns a list | |
796 | of sorted the indexes and not the sorted list. This list of | |
797 | indexes can be used to sort multiple lists by the sorted order | |
798 | of the given sequence. | |
799 | ||
800 | This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. | |
801 | ||
802 | Parameters | |
803 | ---------- | |
804 | seq: iterable | |
805 | The sequence to sort. | |
806 | ||
807 | key: callable, optional | |
808 | A key used to determine how to sort each element of the sequence. | |
809 | It is **not** applied recursively. | |
810 | It should accept a single argument and return a single value. | |
811 | ||
812 | reverse : {{True, False}}, optional | |
813 | Return the list in reversed sorted order. The default is | |
814 | `False`. | |
815 | ||
816 | 537 | alg : ns enum, optional |
817 | 538 | This option is used to control which algorithm `natsort` |
818 | 539 | uses when sorting. For details into these options, please see |
831 | 552 | Notes |
832 | 553 | ----- |
833 | 554 | You may find that if you do not explicitly set |
834 | the locale your results may not be as you expect... I have found that | |
835 | it depends on the system you are on. To do this is straightforward | |
836 | (in the below example I use 'en_US.UTF-8', but you should use your | |
837 | locale):: | |
555 | the locale your results may not be as you expect, although | |
556 | as of ``natsort`` version 4.0.0 the sorting algorithm has been | |
557 | updated to account for a buggy ``locale`` installation. | |
558 | In the below example 'en_US.UTF-8' is used, but you should use your | |
559 | locale:: | |
838 | 560 | |
839 | 561 | >>> import locale |
840 | 562 | >>> # The 'str' call is only to get around a bug on Python 2.x |
845 | 567 | |
846 | 568 | It is preferred that you do this before importing `natsort`. |
847 | 569 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning |
848 | above) then you should not need to do this. | |
570 | above) then you should not need to explicitly set a locale. | |
849 | 571 | |
850 | 572 | Examples |
851 | 573 | -------- |
856 | 578 | [2, 0, 3, 1] |
857 | 579 | |
858 | 580 | """ |
859 | return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) | |
581 | return index_natsorted(seq, key, reverse, alg | ns.LOCALE) | |
860 | 582 | |
861 | 583 | |
862 | 584 | @u_format |
863 | 585 | def index_realsorted(seq, key=None, reverse=False, alg=0): |
864 | 586 | """\ |
865 | Identical to :func:`index_natsorted`. | |
866 | ||
867 | This is provided for forward-compatibility with :mod:`natsort` | |
868 | version >= 4.0.0. If you are relying on the default sorting | |
869 | behavior of :func:`index_natsorted` to sort by signed floats, | |
870 | you should consider using this function as the default sorting | |
871 | behavior of :func:`index_natsorted` will changed to unsigned | |
872 | integers in :mod:`natsort` version >= 4.0.0. | |
587 | Return the list of the indexes used to sort the input sequence | |
588 | in a locale-aware manner. | |
589 | ||
590 | Sorts a sequence in a locale-aware manner, but returns a list | |
591 | of sorted the indexes and not the sorted list. This list of | |
592 | indexes can be used to sort multiple lists by the sorted order | |
593 | of the given sequence. | |
594 | ||
595 | This is a wrapper around ``index_natsorted(seq, alg=ns.REAL)``. | |
596 | ||
597 | The behavior of :func:`index_realsorted` in `natsort` version >= 4.0.0 | |
598 | was the default behavior of :func:`index_natsorted` for `natsort` | |
599 | version < 4.0.0. | |
873 | 600 | |
874 | 601 | Parameters |
875 | 602 | ---------- |
888 | 615 | alg : ns enum, optional |
889 | 616 | This option is used to control which algorithm `natsort` |
890 | 617 | uses when sorting. For details into these options, please see |
891 | the :class:`ns` class documentation. | |
618 | the :class:`ns` class documentation. The default is `ns.REAL`. | |
892 | 619 | |
893 | 620 | Returns |
894 | 621 | ------- |
909 | 636 | [1, 3, 0, 2] |
910 | 637 | |
911 | 638 | """ |
912 | return index_natsorted(seq, key=key, reverse=reverse, alg=alg) | |
639 | return index_natsorted(seq, key, reverse, alg | ns.REAL) | |
913 | 640 | |
914 | 641 | |
915 | 642 | @u_format |
0 | 0 | # -*- coding: utf-8 -*- |
1 | 1 | """This module defines the "ns" enum for natsort.""" |
2 | ||
3 | from __future__ import (print_function, division, | |
4 | unicode_literals, absolute_import) | |
2 | from __future__ import ( | |
3 | print_function, | |
4 | division, | |
5 | unicode_literals, | |
6 | absolute_import | |
7 | ) | |
5 | 8 | |
6 | 9 | |
7 | 10 | class ns(object): |
19 | 22 | C library that Python's locale module uses is broken. |
20 | 23 | On these systems it is recommended that you install |
21 | 24 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ |
22 | if you wish to use ``LOCALE``. If you are on one of | |
25 | if you wish to use ``LOCALE``, especially if you need | |
26 | to handle non-ASCII characters. If you are on one of | |
23 | 27 | systems and get unexpected results, please try using |
24 | 28 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ before |
25 | 29 | filing a bug report to ``natsort``. |
26 | 30 | |
27 | 31 | Attributes |
28 | 32 | ---------- |
33 | INT, I (default) | |
34 | The default - parse numbers as integers. | |
29 | 35 | FLOAT, F |
30 | The default - parse numbers as floats. | |
31 | INT, I | |
32 | Tell `natsort` to parse numbers as ints. | |
33 | UNSIGNED, U | |
34 | Tell `natsort` to ignore any sign (i.e. "-" or "+") to the | |
35 | immediate left of a number. It is the same as setting the old | |
36 | `signed` option to `False`. | |
36 | Tell `natsort` to parse numbers as floats. | |
37 | UNSIGNED, U (default) | |
38 | Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate | |
39 | left of a number. It is the same as setting the old `signed` option | |
40 | to `False`. This is the default. | |
41 | SIGNED, S | |
42 | Tell `natsort` to take into account any sign (i.e. "-" or "+") | |
43 | to the immediate left of a number. It is the same as setting | |
44 | the old `signed` option to `True`. | |
37 | 45 | VERSION, V |
38 | 46 | This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful |
39 | 47 | when attempting to sort version numbers. It is the same as |
40 | setting the old `number_type` option to `None`. | |
48 | setting the old `number_type` option to `None`. Since | |
49 | ``ns.INT | ns.UNSIGNED`` is default, this is is | |
50 | unnecessary. | |
41 | 51 | DIGIT, D |
42 | 52 | Same as `VERSION` above. |
53 | REAL, R | |
54 | This is a shortcut for ``ns.FLOAT | ns.SIGNED``, which is useful | |
55 | when attempting to sort real numbers. | |
43 | 56 | NOEXP, N |
44 | 57 | Tell `natsort` to not search for exponents as part of the number. |
45 | 58 | For example, with `NOEXP` the number "5.6E5" would be interpreted |
46 | as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` | |
47 | option to `False`. | |
59 | as `5.6`, `"E"`, and `5`. It is the same as setting the old | |
60 | `exp` option to `False`. | |
48 | 61 | PATH, P |
49 | 62 | Tell `natsort` to interpret strings as filesystem paths, so they |
50 | 63 | will be split according to the filesystem separator |
51 | 64 | (i.e. '/' on UNIX, '\\' on Windows), as well as splitting on the |
52 | 65 | file extension, if any. Without this, lists of file paths like |
53 | ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted | |
54 | properly; 'Folder/' will be placed at the end, not at the front. | |
55 | It is the same as setting the old `as_path` option to `True`. | |
66 | ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be | |
67 | sorted properly; 'Folder/' will be placed at the end, not at the | |
68 | front. It is the same as setting the old `as_path` option to | |
69 | `True`. | |
56 | 70 | LOCALE, L |
57 | 71 | Tell `natsort` to be locale-aware when sorting strings (everything |
58 | 72 | that was not converted to a number). Your sorting results will vary |
71 | 85 | ``['apple', 'banana', 'Apple', 'Banana']`` (the default order |
72 | 86 | would be ``['Apple', 'Banana', 'apple', 'banana']`` which is |
73 | 87 | the order from a purely ordinal sort). |
74 | Useless when used with `IGNORECASE`. | |
88 | Useless when used with `IGNORECASE`. Please note that if used | |
89 | with ``LOCALE``, this actually has the reverse effect and will | |
90 | put uppercase first (this is because ``LOCALE`` already puts | |
91 | lowercase first); you may use this to your advantage if you | |
92 | need to modify the order returned with ``LOCALE``. | |
75 | 93 | GROUPLETTERS, G |
76 | 94 | Tell `natsort` to group lowercase and uppercase letters together |
77 | 95 | when sorting. For example, |
86 | 104 | behavior without `LOCALE`. |
87 | 105 | UNGROUPLETTERS, UG |
88 | 106 | An alias for `CAPITALFIRST`. |
107 | NANLAST, NL | |
108 | If an NaN shows up in the input, this instructs `natsort` to | |
109 | treat these as +Infinity and place them after all the other numbers. | |
110 | By default, an NaN be treated as -Infinity and be placed first. | |
89 | 111 | TYPESAFE, T |
90 | 112 | Try hard to avoid "unorderable types" error on Python 3. It |
91 | 113 | is the same as setting the old `py3_safe` option to `True`. |
92 | This is only needed if not using ``UNSIGNED`` or if | |
93 | sorting by ``FLOAT``. | |
94 | You shouldn't need to use this unless you are using | |
114 | This is only needed if using ``SIGNED`` or if sorting by | |
115 | ``FLOAT``. You shouldn't need to use this unless you are using | |
95 | 116 | ``natsort_keygen``. *NOTE:* It cannot resolve the ``TypeError`` |
96 | 117 | from trying to compare `str` and `bytes`. |
97 | 118 | |
119 | 140 | |
120 | 141 | |
121 | 142 | # Sort algorithm "enum" values. |
122 | _ns = {'FLOAT': 0, 'F': 0, | |
123 | 'INT': 1, 'I': 1, | |
124 | 'UNSIGNED': 2, 'U': 2, | |
125 | 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED | |
126 | 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED | |
143 | _ns = { | |
144 | 'INT': 0, 'I': 0, | |
145 | 'FLOAT': 1, 'F': 1, | |
146 | 'UNSIGNED': 0, 'U': 0, | |
147 | 'SIGNED': 2, 'S': 2, | |
148 | 'VERSION': 0, 'V': 0, # Shortcut for INT | UNSIGNED | |
149 | 'DIGIT': 0, 'D': 0, # Shortcut for INT | UNSIGNED | |
150 | 'REAL': 3, 'R': 3, # Shortcut for FLOAT | SIGNED | |
127 | 151 | 'NOEXP': 4, 'N': 4, |
128 | 152 | 'PATH': 8, 'P': 8, |
129 | 153 | 'LOCALE': 16, 'L': 16, |
132 | 156 | 'GROUPLETTERS': 128, 'G': 128, |
133 | 157 | 'UNGROUPLETTERS': 256, 'UG': 256, |
134 | 158 | 'CAPITALFIRST': 256, 'C': 256, |
135 | 'TYPESAFE': 1024, 'T': 1024, | |
159 | 'NANLAST': 512, 'NL': 512, | |
160 | 'TYPESAFE': 2048, 'T': 2048, | |
136 | 161 | } |
137 | 162 | # Populate the ns class with the _ns values. |
138 | 163 | for x, y in _ns.items(): |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import (print_function, division, | |
2 | unicode_literals, absolute_import) | |
3 | ||
4 | import functools | |
5 | import sys | |
6 | ||
7 | # These functions are used to make the doctests compatible between | |
8 | # python2 and python3. This code is pretty much lifted from the iPython | |
9 | # project's py3compat.py file. Credit to the iPython devs. | |
10 | ||
11 | # Assume all strings are Unicode in Python 2 | |
12 | py23_str = str if sys.version[0] == '3' else unicode | |
13 | ||
14 | # Use the range iterator always | |
15 | py23_range = range if sys.version[0] == '3' else xrange | |
16 | ||
17 | # Uniform base string type | |
18 | py23_basestring = str if sys.version[0] == '3' else basestring | |
19 | ||
20 | # zip as an iterator | |
21 | if sys.version[0] == '3': | |
22 | py23_zip = zip | |
23 | else: | |
24 | import itertools | |
25 | py23_zip = itertools.izip | |
26 | ||
27 | ||
28 | # This function is intended to decorate other functions that will modify | |
29 | # either a string directly, or a function's docstring. | |
30 | def _modify_str_or_docstring(str_change_func): | |
31 | @functools.wraps(str_change_func) | |
32 | def wrapper(func_or_str): | |
33 | if isinstance(func_or_str, py23_basestring): | |
34 | func = None | |
35 | doc = func_or_str | |
36 | else: | |
37 | func = func_or_str | |
38 | doc = func.__doc__ | |
39 | ||
40 | doc = str_change_func(doc) | |
41 | ||
42 | if func: | |
43 | func.__doc__ = doc | |
44 | return func | |
45 | return doc | |
46 | return wrapper | |
47 | ||
48 | ||
49 | # Properly modify a doctstring to either have the unicode literal or not. | |
50 | if sys.version[0] == '3': | |
51 | # Abstract u'abc' syntax: | |
52 | @_modify_str_or_docstring | |
53 | def u_format(s): | |
54 | """"{u}'abc'" --> "'abc'" (Python 3) | |
55 | ||
56 | Accepts a string or a function, so it can be used as a decorator.""" | |
57 | return s.format(u='') | |
58 | else: | |
59 | # Abstract u'abc' syntax: | |
60 | @_modify_str_or_docstring | |
61 | def u_format(s): | |
62 | """"{u}'abc'" --> "u'abc'" (Python 2) | |
63 | ||
64 | Accepts a string or a function, so it can be used as a decorator.""" | |
65 | return s.format(u='u') |
0 | # -*- coding: utf-8 -*- | |
1 | """ | |
2 | Contains all possible non-ASCII unicode numbers. | |
3 | """ | |
4 | from __future__ import ( | |
5 | print_function, | |
6 | division, | |
7 | unicode_literals, | |
8 | absolute_import | |
9 | ) | |
10 | ||
11 | # Std. lib imports. | |
12 | import unicodedata | |
13 | ||
14 | # Local imports. | |
15 | from natsort.compat.py23 import py23_unichr | |
16 | ||
17 | ||
18 | # Rather than determine this on the fly, which would incur a startup | |
19 | # runtime penalty, the hex values of the Unicode numeric characters | |
20 | # are hard-coded below. | |
21 | numeric_hex = [ | |
22 | 0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, 0X663, 0X664, | |
23 | 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, 0X6F1, 0X6F2, 0X6F3, 0X6F4, | |
24 | 0X6F5, 0X6F6, 0X6F7, 0X6F8, 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4, | |
25 | 0X7C5, 0X7C6, 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A, | |
26 | 0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, 0X9E9, 0X9EA, | |
27 | 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, 0X9F5, 0X9F6, 0X9F7, 0X9F8, | |
28 | 0X9F9, 0XA66, 0XA67, 0XA68, 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E, | |
29 | 0XA6F, 0XAE6, 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE, | |
30 | 0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, 0XB6D, 0XB6E, | |
31 | 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, 0XB77, 0XBE6, 0XBE7, 0XBE8, | |
32 | 0XBE9, 0XBEA, 0XBEB, 0XBEC, 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2, | |
33 | 0XC66, 0XC67, 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F, | |
34 | 0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, 0XCE7, 0XCE8, | |
35 | 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, 0XCEF, 0XD66, 0XD67, 0XD68, | |
36 | 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72, | |
37 | 0XD73, 0XD74, 0XD75, 0XE50, 0XE51, 0XE52, 0XE53, 0XE54, 0XE55, 0XE56, | |
38 | 0XE57, 0XE58, 0XE59, 0XED0, 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6, | |
39 | 0XED7, 0XED8, 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26, | |
40 | 0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, 0XF2F, 0XF30, | |
41 | 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, 0X1043, 0X1044, 0X1045, | |
42 | 0X1046, 0X1047, 0X1048, 0X1049, 0X1090, 0X1091, 0X1092, 0X1093, 0X1094, | |
43 | 0X1095, 0X1096, 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C, | |
44 | 0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, 0X1374, 0X1375, | |
45 | 0X1376, 0X1377, 0X1378, 0X1379, 0X137A, 0X137B, 0X137C, 0X16EE, 0X16EF, | |
46 | 0X16F0, 0X17E0, 0X17E1, 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7, | |
47 | 0X17E8, 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, 0X17F6, | |
48 | 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, 0X1813, 0X1814, 0X1815, | |
49 | 0X1816, 0X1817, 0X1818, 0X1819, 0X1946, 0X1947, 0X1948, 0X1949, 0X194A, | |
50 | 0X194B, 0X194C, 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3, | |
51 | 0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, 0X1A80, 0X1A81, | |
52 | 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, 0X1A87, 0X1A88, 0X1A89, 0X1A90, | |
53 | 0X1A91, 0X1A92, 0X1A93, 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99, | |
54 | 0X1B50, 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, 0X1B58, | |
55 | 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, 0X1BB5, 0X1BB6, 0X1BB7, | |
56 | 0X1BB8, 0X1BB9, 0X1C40, 0X1C41, 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46, | |
57 | 0X1C47, 0X1C48, 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55, | |
58 | 0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, 0X2076, 0X2077, | |
59 | 0X2078, 0X2079, 0X2080, 0X2081, 0X2082, 0X2083, 0X2084, 0X2085, 0X2086, | |
60 | 0X2087, 0X2088, 0X2089, 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155, | |
61 | 0X2156, 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, 0X215E, | |
62 | 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, 0X2165, 0X2166, 0X2167, | |
63 | 0X2168, 0X2169, 0X216A, 0X216B, 0X216C, 0X216D, 0X216E, 0X216F, 0X2170, | |
64 | 0X2171, 0X2172, 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179, | |
65 | 0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, 0X2181, 0X2182, | |
66 | 0X2185, 0X2186, 0X2187, 0X2188, 0X2189, 0X2460, 0X2461, 0X2462, 0X2463, | |
67 | 0X2464, 0X2465, 0X2466, 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C, | |
68 | 0X246D, 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, 0X2475, | |
69 | 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, 0X247C, 0X247D, 0X247E, | |
70 | 0X247F, 0X2480, 0X2481, 0X2482, 0X2483, 0X2484, 0X2485, 0X2486, 0X2487, | |
71 | 0X2488, 0X2489, 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490, | |
72 | 0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, 0X2498, 0X2499, | |
73 | 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, 0X24ED, 0X24EE, 0X24EF, 0X24F0, | |
74 | 0X24F1, 0X24F2, 0X24F3, 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9, | |
75 | 0X24FA, 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, 0X2778, | |
76 | 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, 0X277F, 0X2780, 0X2781, | |
77 | 0X2782, 0X2783, 0X2784, 0X2785, 0X2786, 0X2787, 0X2788, 0X2789, 0X278A, | |
78 | 0X278B, 0X278C, 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793, | |
79 | 0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, 0X3026, 0X3027, | |
80 | 0X3028, 0X3029, 0X3038, 0X3039, 0X303A, 0X3192, 0X3193, 0X3194, 0X3195, | |
81 | 0X3220, 0X3221, 0X3222, 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228, | |
82 | 0X3229, 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, 0X324F, | |
83 | 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, 0X3257, 0X3258, 0X3259, | |
84 | 0X325A, 0X325B, 0X325C, 0X325D, 0X325E, 0X325F, 0X3280, 0X3281, 0X3282, | |
85 | 0X3283, 0X3284, 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2, | |
86 | 0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, 0X32BA, 0X32BB, | |
87 | 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, 0X3483, 0X382A, 0X3B4D, 0X4E00, | |
88 | 0X4E03, 0X4E07, 0X4E09, 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0, | |
89 | 0X4EDF, 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, 0X516D, | |
90 | 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, 0X53C2, 0X53C3, 0X53C4, | |
91 | 0X56DB, 0X58F1, 0X58F9, 0X5E7A, 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E, | |
92 | 0X5F10, 0X62FE, 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C, | |
93 | 0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, 0X96F6, 0XA620, | |
94 | 0XA621, 0XA622, 0XA623, 0XA624, 0XA625, 0XA626, 0XA627, 0XA628, 0XA629, | |
95 | 0XA6E6, 0XA6E7, 0XA6E8, 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE, | |
96 | 0XA6EF, 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, 0XA8D1, | |
97 | 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, 0XA8D8, 0XA8D9, 0XA900, | |
98 | 0XA901, 0XA902, 0XA903, 0XA904, 0XA905, 0XA906, 0XA907, 0XA908, 0XA909, | |
99 | 0XA9D0, 0XA9D1, 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8, | |
100 | 0XA9D9, 0XAA50, 0XAA51, 0XAA52, 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57, | |
101 | 0XAA58, 0XAA59, 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6, | |
102 | 0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, 0XF9D1, 0XF9D3, | |
103 | 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, 0XFF14, 0XFF15, 0XFF16, 0XFF17, | |
104 | 0XFF18, 0XFF19, 0X10107, 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C, | |
105 | 0X1010D, 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, 0X10114, | |
106 | 0X10115, 0X10116, 0X10117, 0X10118, 0X10119, 0X1011A, 0X1011B, 0X1011C, | |
107 | 0X1011D, 0X1011E, 0X1011F, 0X10120, 0X10121, 0X10122, 0X10123, 0X10124, | |
108 | 0X10125, 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, 0X1012C, | |
109 | 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, 0X10132, 0X10133, 0X10140, | |
110 | 0X10141, 0X10142, 0X10143, 0X10144, 0X10145, 0X10146, 0X10147, 0X10148, | |
111 | 0X10149, 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, 0X10150, | |
112 | 0X10151, 0X10152, 0X10153, 0X10154, 0X10155, 0X10156, 0X10157, 0X10158, | |
113 | 0X10159, 0X1015A, 0X1015B, 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160, | |
114 | 0X10161, 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, 0X10168, | |
115 | 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, 0X1016E, 0X1016F, 0X10170, | |
116 | 0X10171, 0X10172, 0X10173, 0X10174, 0X10175, 0X10176, 0X10177, 0X10178, | |
117 | 0X1018A, 0X10320, 0X10321, 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1, | |
118 | 0X103D2, 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, 0X104A3, | |
119 | 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, 0X104A9, 0X10858, 0X10859, | |
120 | 0X1085A, 0X1085B, 0X1085C, 0X1085D, 0X1085E, 0X1085F, 0X10916, 0X10917, | |
121 | 0X10918, 0X10919, 0X1091A, 0X1091B, 0X10A40, 0X10A41, 0X10A42, 0X10A43, | |
122 | 0X10A44, 0X10A45, 0X10A46, 0X10A47, 0X10A7D, 0X10A7E, 0X10B58, 0X10B59, | |
123 | 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, 0X10B5F, 0X10B78, 0X10B79, | |
124 | 0X10B7A, 0X10B7B, 0X10B7C, 0X10B7D, 0X10B7E, 0X10B7F, 0X10E60, 0X10E61, | |
125 | 0X10E62, 0X10E63, 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69, | |
126 | 0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, 0X10E70, 0X10E71, | |
127 | 0X10E72, 0X10E73, 0X10E74, 0X10E75, 0X10E76, 0X10E77, 0X10E78, 0X10E79, | |
128 | 0X10E7A, 0X10E7B, 0X10E7C, 0X10E7D, 0X10E7E, 0X11052, 0X11053, 0X11054, | |
129 | 0X11055, 0X11056, 0X11057, 0X11058, 0X11059, 0X1105A, 0X1105B, 0X1105C, | |
130 | 0X1105D, 0X1105E, 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064, | |
131 | 0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, 0X1106B, 0X1106C, | |
132 | 0X1106D, 0X1106E, 0X1106F, 0X110F0, 0X110F1, 0X110F2, 0X110F3, 0X110F4, | |
133 | 0X110F5, 0X110F6, 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138, | |
134 | 0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, 0X1113F, 0X111D0, | |
135 | 0X111D1, 0X111D2, 0X111D3, 0X111D4, 0X111D5, 0X111D6, 0X111D7, 0X111D8, | |
136 | 0X111D9, 0X116C0, 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6, | |
137 | 0X116C7, 0X116C8, 0X116C9, 0X12400, 0X12401, 0X12402, 0X12403, 0X12404, | |
138 | 0X12405, 0X12406, 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C, | |
139 | 0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, 0X12413, 0X12414, | |
140 | 0X12415, 0X12416, 0X12417, 0X12418, 0X12419, 0X1241A, 0X1241B, 0X1241C, | |
141 | 0X1241D, 0X1241E, 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424, | |
142 | 0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, 0X1242B, 0X1242C, | |
143 | 0X1242D, 0X1242E, 0X1242F, 0X12430, 0X12431, 0X12432, 0X12433, 0X12434, | |
144 | 0X12435, 0X12436, 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C, | |
145 | 0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, 0X12443, 0X12444, | |
146 | 0X12445, 0X12446, 0X12447, 0X12448, 0X12449, 0X1244A, 0X1244B, 0X1244C, | |
147 | 0X1244D, 0X1244E, 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454, | |
148 | 0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, 0X1245B, 0X1245C, | |
149 | 0X1245D, 0X1245E, 0X1245F, 0X12460, 0X12461, 0X12462, 0X1D360, 0X1D361, | |
150 | 0X1D362, 0X1D363, 0X1D364, 0X1D365, 0X1D366, 0X1D367, 0X1D368, 0X1D369, | |
151 | 0X1D36A, 0X1D36B, 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371, | |
152 | 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, 0X1D7D3, 0X1D7D4, 0X1D7D5, | |
153 | 0X1D7D6, 0X1D7D7, 0X1D7D8, 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD, | |
154 | 0X1D7DE, 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, 0X1D7E5, | |
155 | 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, 0X1D7EB, 0X1D7EC, 0X1D7ED, | |
156 | 0X1D7EE, 0X1D7EF, 0X1D7F0, 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5, | |
157 | 0X1D7F6, 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, | |
158 | 0X1D7FE, 0X1D7FF, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, | |
159 | 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X20001, 0X20064, 0X200E2, | |
160 | 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, | |
161 | 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890, | |
162 | ] | |
163 | ||
164 | # Convert each hex into the literal Unicode character. | |
165 | # Stop if a ValueError is raised in case of a narrow Unicode build. | |
166 | # The extra check with unicodedata is in case this Python version | |
167 | # does not support some characters. | |
168 | numeric_chars = [] | |
169 | for a in numeric_hex: | |
170 | try: | |
171 | l = py23_unichr(a) | |
172 | except ValueError: | |
173 | break | |
174 | if unicodedata.numeric(l, None) is None: | |
175 | continue | |
176 | numeric_chars.append(l) | |
177 | ||
178 | # The digit characters are a subset of the numerals. | |
179 | digit_chars = [a for a in numeric_chars | |
180 | if unicodedata.digit(a, None) is not None] | |
181 | ||
182 | # Create a single string with the above data. | |
183 | digits = ''.join(digit_chars) | |
184 | numeric = ''.join(numeric_chars) |
1 | 1 | """ |
2 | 2 | Utilities and definitions for natsort, mostly all used to define |
3 | 3 | the _natsort_key function. |
4 | ||
5 | 4 | """ |
6 | ||
7 | from __future__ import (print_function, division, | |
8 | unicode_literals, absolute_import) | |
5 | from __future__ import ( | |
6 | print_function, | |
7 | division, | |
8 | unicode_literals, | |
9 | absolute_import | |
10 | ) | |
9 | 11 | |
10 | 12 | # Std. lib imports. |
11 | 13 | import re |
14 | from math import isnan | |
12 | 15 | from warnings import warn |
13 | 16 | from os import curdir, pardir |
14 | 17 | from os.path import split, splitext |
16 | 19 | from locale import localeconv |
17 | 20 | |
18 | 21 | # Local imports. |
19 | from natsort.locale_help import locale_convert, grouper, null_string | |
20 | from natsort.py23compat import py23_str, py23_zip | |
21 | 22 | from natsort.ns_enum import ns, _ns |
22 | ||
23 | # If the user has fastnumbers installed, they will get great speed | |
24 | # benefits. If not, we simulate the functions here. | |
25 | try: | |
26 | from fastnumbers import fast_float, fast_int, isreal | |
27 | except ImportError: | |
28 | from natsort.fake_fastnumbers import fast_float, fast_int, isreal | |
29 | ||
30 | # If the user has pathlib installed, the ns.PATH option will convert | |
31 | # Path objects to str before sorting. | |
32 | try: | |
33 | from pathlib import PurePath # PurePath is the base object for Paths. | |
34 | except ImportError: | |
35 | PurePath = object # To avoid NameErrors. | |
36 | has_pathlib = False | |
37 | else: | |
38 | has_pathlib = True | |
23 | from natsort.unicode_numbers import digits, numeric | |
24 | from natsort.locale_help import locale_convert, grouper | |
25 | from natsort.compat.pathlib import PurePath, has_pathlib | |
26 | from natsort.compat.py23 import ( | |
27 | py23_str, | |
28 | py23_zip, | |
29 | PY_VERSION, | |
30 | ) | |
31 | from natsort.compat.locale import ( | |
32 | dumb_sort, | |
33 | use_pyicu, | |
34 | null_string, | |
35 | ) | |
36 | from natsort.compat.fastnumbers import ( | |
37 | fast_float, | |
38 | fast_int, | |
39 | isint, | |
40 | isfloat, | |
41 | ) | |
39 | 42 | |
40 | 43 | # Group algorithm types for easy extraction |
41 | _NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP | |
42 | _ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | | |
44 | _NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.SIGNED | ns.NOEXP | |
45 | _ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.S | ns.N | ns.L | | |
43 | 46 | ns.IC | ns.LF | ns.G | ns.UG | ns.TYPESAFE) |
44 | 47 | |
45 | # The regex that locates floats | |
46 | _float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) | |
47 | _float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) | |
48 | _float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) | |
49 | _float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) | |
50 | _float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) | |
51 | _float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) | |
52 | _float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) | |
53 | _float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) | |
54 | ||
55 | # Integer regexes | |
56 | _int_nosign_re = re.compile(r'(\d+)', re.U) | |
57 | _int_sign_re = re.compile(r'([-+]?\d+)', re.U) | |
48 | # The regex that locates floats - include Unicode numerals. | |
49 | _float_sign_exp_re = r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
50 | _float_sign_exp_re = _float_sign_exp_re.format(numeric) | |
51 | _float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) | |
52 | _float_nosign_exp_re = r'([0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
53 | _float_nosign_exp_re = _float_nosign_exp_re.format(numeric) | |
54 | _float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U) | |
55 | _float_sign_noexp_re = r'([-+]?[0-9]*\.?[0-9]+|[{0}])' | |
56 | _float_sign_noexp_re = _float_sign_noexp_re.format(numeric) | |
57 | _float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) | |
58 | _float_nosign_noexp_re = r'([0-9]*\.?[0-9]+|[{0}])' | |
59 | _float_nosign_noexp_re = _float_nosign_noexp_re.format(numeric) | |
60 | _float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) | |
61 | _float_sign_exp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?)|[{0}]' | |
62 | _float_sign_exp_re_c = _float_sign_exp_re_c.format(numeric) | |
63 | _float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U) | |
64 | _float_nosign_exp_re_c = r'([0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
65 | _float_nosign_exp_re_c = _float_nosign_exp_re_c.format(numeric) | |
66 | _float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U) | |
67 | _float_sign_noexp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+|[{0}])' | |
68 | _float_sign_noexp_re_c = _float_sign_noexp_re_c.format(numeric) | |
69 | _float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U) | |
70 | _float_nosign_noexp_re_c = r'([0-9]*[.,]?[0-9]+|[{0}])' | |
71 | _float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(numeric) | |
72 | _float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U) | |
73 | ||
74 | # Integer regexes - include Unicode digits. | |
75 | _int_nosign_re = r'([0-9]+|[{0}])'.format(digits) | |
76 | _int_nosign_re = re.compile(_int_nosign_re, flags=re.U) | |
77 | _int_sign_re = r'([-+]?[0-9]+|[{0}])'.format(digits) | |
78 | _int_sign_re = re.compile(_int_sign_re, flags=re.U) | |
58 | 79 | |
59 | 80 | # This dict will help select the correct regex and number conversion function. |
60 | 81 | _regex_and_num_function_chooser = { |
61 | (ns.F, '.'): (_float_sign_exp_re, fast_float), | |
62 | (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), | |
82 | (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float), | |
83 | (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float), | |
63 | 84 | (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), |
64 | 85 | (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), |
65 | (ns.I, '.'): (_int_sign_re, fast_int), | |
66 | (ns.I | ns.N, '.'): (_int_sign_re, fast_int), | |
86 | (ns.I | ns.S, '.'): (_int_sign_re, fast_int), | |
87 | (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int), | |
67 | 88 | (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), |
68 | 89 | (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), |
69 | (ns.F, ','): (_float_sign_exp_re_c, fast_float), | |
70 | (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), | |
90 | (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float), | |
91 | (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float), | |
71 | 92 | (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), |
72 | 93 | (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), |
73 | (ns.I, ','): (_int_sign_re, fast_int), | |
74 | (ns.I | ns.N, ','): (_int_sign_re, fast_int), | |
94 | (ns.I | ns.S, ','): (_int_sign_re, fast_int), | |
95 | (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int), | |
75 | 96 | (ns.I | ns.U, ','): (_int_nosign_re, fast_int), |
76 | 97 | (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), |
77 | 98 | } |
99 | ||
100 | # Dict to select checker function from converter function | |
101 | _conv_to_check = {fast_float: isfloat, fast_int: isint} | |
78 | 102 | |
79 | 103 | |
80 | 104 | def _do_decoding(s, encoding): |
87 | 111 | return s |
88 | 112 | |
89 | 113 | |
90 | def _args_to_enum(number_type, signed, exp, as_path, py3_safe): | |
114 | def _args_to_enum(**kwargs): | |
91 | 115 | """A function to convert input booleans to an enum-type argument.""" |
92 | 116 | alg = 0 |
93 | if number_type is not float: | |
117 | keys = ('number_type', 'signed', 'exp', 'as_path', 'py3_safe') | |
118 | if any(x not in keys for x in kwargs): | |
119 | x = set(kwargs) - set(keys) | |
120 | raise TypeError('Invalid argument(s): ' + ', '.join(x)) | |
121 | if 'number_type' in kwargs and kwargs['number_type'] is not int: | |
94 | 122 | msg = "The 'number_type' argument is deprecated as of 3.5.0, " |
95 | 123 | msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" |
96 | 124 | warn(msg, DeprecationWarning) |
97 | alg |= (_ns['INT'] * bool(number_type in (int, None))) | |
98 | alg |= (_ns['UNSIGNED'] * (number_type is None)) | |
99 | if signed is not None: | |
125 | alg |= (_ns['FLOAT'] * bool(kwargs['number_type'] is float)) | |
126 | alg |= (_ns['INT'] * bool(kwargs['number_type'] in (int, None))) | |
127 | alg |= (_ns['SIGNED'] * (kwargs['number_type'] not in (float, None))) | |
128 | if 'signed' in kwargs and kwargs['signed'] is not None: | |
100 | 129 | msg = "The 'signed' argument is deprecated as of 3.5.0, " |
101 | msg += "please use 'alg=ns.UNSIGNED'." | |
102 | warn(msg, DeprecationWarning) | |
103 | alg |= (_ns['UNSIGNED'] * (not signed)) | |
104 | if exp is not None: | |
130 | msg += "please use 'alg=ns.SIGNED'." | |
131 | warn(msg, DeprecationWarning) | |
132 | alg |= (_ns['SIGNED'] * bool(kwargs['signed'])) | |
133 | if 'exp' in kwargs and kwargs['exp'] is not None: | |
105 | 134 | msg = "The 'exp' argument is deprecated as of 3.5.0, " |
106 | 135 | msg += "please use 'alg=ns.NOEXP'." |
107 | 136 | warn(msg, DeprecationWarning) |
108 | alg |= (_ns['NOEXP'] * (not exp)) | |
109 | if as_path is not None: | |
137 | alg |= (_ns['NOEXP'] * (not kwargs['exp'])) | |
138 | if 'as_path' in kwargs and kwargs['as_path'] is not None: | |
110 | 139 | msg = "The 'as_path' argument is deprecated as of 3.5.0, " |
111 | 140 | msg += "please use 'alg=ns.PATH'." |
112 | 141 | warn(msg, DeprecationWarning) |
113 | alg |= (_ns['PATH'] * as_path) | |
114 | if py3_safe is not None: | |
142 | alg |= (_ns['PATH'] * kwargs['as_path']) | |
143 | if 'py3_safe' in kwargs and kwargs['py3_safe'] is not None: | |
115 | 144 | msg = "The 'py3_safe' argument is deprecated as of 3.5.0, " |
116 | 145 | msg += "please use 'alg=ns.TYPESAFE'." |
117 | 146 | warn(msg, DeprecationWarning) |
118 | alg |= (_ns['TYPESAFE'] * py3_safe) | |
147 | alg |= (_ns['TYPESAFE'] * kwargs['py3_safe']) | |
119 | 148 | return alg |
120 | 149 | |
121 | 150 | |
122 | 151 | def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters): |
123 | 152 | """Helper to separate the string input into numbers and strings.""" |
153 | conv_check = (numconv, _conv_to_check[numconv]) | |
124 | 154 | |
125 | 155 | # Split the input string by numbers. |
126 | 156 | # If the input is not a string, TypeError is raised. |
130 | 160 | # Take into account locale if needed, and group letters if needed. |
131 | 161 | # Remove empty strings from the list. |
132 | 162 | if use_locale: |
133 | s = [locale_convert(x, numconv, group_letters) for x in s if x] | |
163 | s = [locale_convert(x, conv_check, group_letters) for x in s if x] | |
134 | 164 | elif group_letters: |
135 | s = [grouper(x, numconv) for x in s if x] | |
165 | s = [grouper(x, conv_check) for x in s if x] | |
136 | 166 | else: |
137 | 167 | s = [numconv(x) for x in s if x] |
138 | 168 | |
139 | 169 | # If the list begins with a number, lead with an empty string. |
140 | 170 | # This is used to get around the "unorderable types" issue. |
141 | if not s: # Return empty tuple for empty results. | |
142 | return () | |
143 | elif isreal(s[0]): | |
171 | if not s: # Return empty list for empty results. | |
172 | return [] | |
173 | elif conv_check[1](s[0], num_only=True): | |
144 | 174 | s = [null_string if use_locale else ''] + s |
145 | 175 | |
146 | 176 | # The _py3_safe function inserts "" between numbers in the list, |
147 | 177 | # and is used to get around "unorderable types" in complex cases. |
148 | 178 | # It is a separate function that needs to be requested specifically |
149 | 179 | # because it is expensive to call. |
150 | return _py3_safe(s, use_locale) if py3_safe else s | |
180 | return _py3_safe(s, use_locale, conv_check[1]) if py3_safe else s | |
151 | 181 | |
152 | 182 | |
153 | 183 | def _path_splitter(s, _d_match=re.compile(r'\.\d').match): |
157 | 187 | # Convert a pathlib PurePath object to a string. |
158 | 188 | if has_pathlib and isinstance(s, PurePath): |
159 | 189 | path_location = str(s) |
160 | else: | |
190 | else: # pragma: no cover | |
161 | 191 | path_location = s |
162 | 192 | |
163 | 193 | # Continue splitting the path from the back until we have reached |
198 | 228 | return path_parts + base_parts |
199 | 229 | |
200 | 230 | |
201 | def _py3_safe(parsed_list, use_locale): | |
231 | def _py3_safe(parsed_list, use_locale, check): | |
202 | 232 | """Insert '' between two numbers.""" |
203 | 233 | length = len(parsed_list) |
204 | 234 | if length < 2: |
208 | 238 | nl_append = new_list.append |
209 | 239 | for before, after in py23_zip(islice(parsed_list, 0, length-1), |
210 | 240 | islice(parsed_list, 1, None)): |
211 | if isreal(before) and isreal(after): | |
241 | if check(before, num_only=True) and check(after, num_only=True): | |
212 | 242 | nl_append(null_string if use_locale else '') |
213 | 243 | nl_append(after) |
214 | 244 | return new_list |
245 | ||
246 | ||
247 | def _fix_nan(ret, alg): | |
248 | """Detect an NaN and replace or raise a ValueError.""" | |
249 | t = [] | |
250 | for r in ret: | |
251 | if isfloat(r, num_only=True) and isnan(r): | |
252 | if alg & _ns['NANLAST']: | |
253 | t.append(float('+inf')) | |
254 | else: | |
255 | t.append(float('-inf')) | |
256 | else: | |
257 | t.append(r) | |
258 | return tuple(t) | |
215 | 259 | |
216 | 260 | |
217 | 261 | def _natsort_key(val, key, alg): |
274 | 318 | |
275 | 319 | # Assume the input are strings, which is the most common case. |
276 | 320 | # Apply the string modification if needed. |
321 | orig_val = val | |
277 | 322 | try: |
278 | if alg & _ns['LOWERCASEFIRST']: | |
323 | lowfirst = alg & _ns['LOWERCASEFIRST'] | |
324 | dumb = dumb_sort() if use_locale else False | |
325 | if use_locale and dumb and not lowfirst: | |
326 | val = val.swapcase() # Compensate for bad locale lib. | |
327 | elif lowfirst and not (use_locale and dumb): | |
279 | 328 | val = val.swapcase() |
280 | 329 | if alg & _ns['IGNORECASE']: |
281 | val = val.lower() | |
282 | if use_locale and alg & _ns['UNGROUPLETTERS'] and val[0].isupper(): | |
283 | val = ' ' + val | |
284 | return tuple(_number_extracter(val, | |
285 | regex, | |
286 | num_function, | |
287 | alg & _ns['TYPESAFE'], | |
288 | use_locale, | |
289 | alg & _ns['GROUPLETTERS'])) | |
330 | val = val.casefold() if PY_VERSION >= 3.3 else val.lower() | |
331 | gl = alg & _ns['GROUPLETTERS'] | |
332 | ret = tuple(_number_extracter(val, | |
333 | regex, | |
334 | num_function, | |
335 | alg & _ns['TYPESAFE'], | |
336 | use_locale, | |
337 | gl or (use_locale and dumb))) | |
338 | # Handle NaN. | |
339 | if any(isfloat(x, num_only=True) and isnan(x) for x in ret): | |
340 | ret = _fix_nan(ret, alg) | |
341 | # For UNGROUPLETTERS, so the high level grouping can occur | |
342 | # based on the first letter of the string. | |
343 | # Do no locale transformation of the characters. | |
344 | if use_locale and alg & _ns['UNGROUPLETTERS']: | |
345 | if not ret: | |
346 | return (ret, ret) | |
347 | elif ret[0] == null_string: | |
348 | return ((b'' if use_pyicu else '',), ret) | |
349 | elif dumb: | |
350 | if lowfirst: | |
351 | return ((orig_val[0].swapcase(),), ret) | |
352 | else: | |
353 | return ((orig_val[0],), ret) | |
354 | else: | |
355 | return ((val[0],), ret) | |
356 | else: | |
357 | return ret | |
290 | 358 | except (TypeError, AttributeError): |
291 | 359 | # Check if it is a bytes type, and if so return as a |
292 | 360 | # one element tuple. |
293 | 361 | if type(val) in (bytes,): |
294 | return (val,) | |
362 | return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,) | |
295 | 363 | # If not strings, assume it is an iterable that must |
296 | 364 | # be parsed recursively. Do not apply the key recursively. |
297 | 365 | # If this string was split as a path, turn off 'PATH'. |
304 | 372 | # Return as-is, with a leading empty string. |
305 | 373 | except TypeError: |
306 | 374 | n = null_string if use_locale else '' |
375 | if isfloat(val, num_only=True) and isnan(val): | |
376 | val = _fix_nan([val], alg)[0] | |
307 | 377 | return ((n, val,),) if alg & _ns['PATH'] else (n, val,) |
5 | 5 | |
6 | 6 | [pytest] |
7 | 7 | flakes-ignore = |
8 | natsort/py23compat.py UndefinedName | |
8 | natsort/compat/py23.py UndefinedName | |
9 | 9 | natsort/__init__.py UnusedImport |
10 | natsort/compat/* UnusedImport | |
10 | 11 | docs/source/conf.py ALL |
11 | 12 | test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused |
12 | 13 | test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused |
14 | test_natsort/compat/* UnusedImport | |
13 | 15 | |
14 | 16 | pep8ignore = |
17 | natsort/ns_enum.py E126 E241 E123 | |
15 | 18 | test_natsort/test_natsort.py E501 E241 E221 |
16 | 19 | test_natsort/test_utils.py E501 E241 E221 |
17 | 20 | test_natsort/test_locale_help.py E501 E241 E221 |
18 | 21 | test_natsort/test_main.py E501 E241 E221 |
19 | 22 | test_natsort/profile_natsorted.py ALL |
20 | 23 | docs/source/conf.py ALL |
24 | ||
25 | [flake8] | |
26 | max-line-length = 160 | |
27 | ignore = E231,E302 |
22 | 22 | import pytest |
23 | 23 | err1 = pytest.main(['--cov', 'natsort', |
24 | 24 | '--cov-report', 'term-missing', |
25 | '--flakes', '--pep8']) | |
25 | '--flakes', | |
26 | '--pep8', | |
27 | '-s', | |
28 | # '--failed', | |
29 | # '-v', | |
30 | ]) | |
26 | 31 | err2 = pytest.main(['--doctest-modules', 'natsort']) |
27 | 32 | err3 = pytest.main(['README.rst', |
28 | 33 | 'docs/source/intro.rst', |
55 | 60 | REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else '' |
56 | 61 | |
57 | 62 | # Testing needs pytest, and mock if less than python 3.3 |
58 | TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'] | |
59 | if sys.version[0] == 2 or (sys.version[3] == '3' and int(sys.version[2]) < 3): | |
63 | TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', | |
64 | 'pytest-cov', 'pytest-cache', 'hypothesis'] | |
65 | ||
66 | if (sys.version.startswith('2') or | |
67 | (sys.version.startswith('3') and int(sys.version.split('.')[1]) < 3)): | |
60 | 68 | TESTS_REQUIRE.append('mock') |
69 | if (sys.version.startswith('2') or | |
70 | (sys.version.startswith('3') and int(sys.version.split('.')[1]) < 4)): | |
71 | TESTS_REQUIRE.append('pathlib') | |
61 | 72 | |
62 | 73 | # The setup parameters |
63 | 74 | setup( |
68 | 79 | url='https://github.com/SethMMorton/natsort', |
69 | 80 | license='MIT', |
70 | 81 | install_requires=REQUIRES, |
71 | packages=['natsort'], | |
82 | packages=['natsort', 'natsort.compat'], | |
72 | 83 | entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, |
73 | 84 | tests_require=TESTS_REQUIRE, |
74 | 85 | cmdclass={'test': PyTest}, |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | import sys | |
8 | import compat.mock | |
9 | ||
10 | major_minor = sys.version_info[:2] | |
11 | ||
12 | # Use hypothesis if not on python 2.6. | |
13 | if major_minor != (2, 6): | |
14 | use_hypothesis = True | |
15 | from hypothesis import assume, given, example | |
16 | from hypothesis.specifiers import ( | |
17 | integers_in_range, | |
18 | integers_from, | |
19 | sampled_from, | |
20 | ) | |
21 | # Otherwise mock these imports, because hypothesis | |
22 | # is incompatible with python 2.6. | |
23 | else: | |
24 | example = integers_in_range = integers_from = \ | |
25 | sampled_from = assume = given = compat.mock.MagicMock() | |
26 | use_hypothesis = False |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | ||
8 | # Std. lib imports. | |
9 | import locale | |
10 | ||
11 | # Local imports | |
12 | from natsort.locale_help import use_pyicu | |
13 | from natsort.compat.py23 import py23_str | |
14 | ||
15 | ||
16 | def load_locale(x): | |
17 | """ Convenience to load a locale, trying ISO8859-1 first.""" | |
18 | try: | |
19 | locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x))) | |
20 | except: | |
21 | locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x))) | |
22 | ||
23 | # Check if de_DE is installed. | |
24 | try: | |
25 | load_locale('de_DE') | |
26 | has_locale_de_DE = True | |
27 | except locale.Error: | |
28 | has_locale_de_DE = False | |
29 | ||
30 | # Make a function that will return the appropriate | |
31 | # strxfrm for the current locale. | |
32 | if use_pyicu: | |
33 | from natsort.locale_help import get_pyicu_transform | |
34 | from locale import getlocale | |
35 | ||
36 | def get_strxfrm(): | |
37 | return get_pyicu_transform(getlocale()) | |
38 | else: | |
39 | from natsort.locale_help import strxfrm | |
40 | ||
41 | def get_strxfrm(): | |
42 | return strxfrm | |
43 | ||
44 | # Depending on the python version, use lower or casefold | |
45 | # to make a string lowercase. | |
46 | try: | |
47 | low = py23_str.casefold | |
48 | except AttributeError: | |
49 | low = py23_str.lower |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | # Load mock functions from the right place. | |
8 | try: | |
9 | from unittest.mock import MagicMock, patch, call | |
10 | except ImportError: | |
11 | from mock import MagicMock, patch, call |
9 | 9 | |
10 | 10 | sys.path.insert(0, '.') |
11 | 11 | from natsort import natsorted, index_natsorted |
12 | from natsort.py23compat import py23_range | |
12 | from natsort.compat.py23 import py23_range | |
13 | 13 | |
14 | 14 | |
15 | 15 | # Sample lists to sort |
0 | # -*- coding: utf-8 -*- | |
1 | """Alternate versions of the splitting functions for testing.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import unicodedata | |
5 | from natsort.compat.py23 import PY_VERSION | |
6 | ||
7 | if PY_VERSION >= 3.0: | |
8 | long = int | |
9 | ||
10 | ||
11 | def int_splitter(x, signed, safe, sep): | |
12 | """Alternate (slow) method to split a string into numbers.""" | |
13 | if not x: | |
14 | return [] | |
15 | all_digits = set('0123456789') | |
16 | full_list, strings, nums = [], [], [] | |
17 | input_len = len(x) | |
18 | for i, char in enumerate(x): | |
19 | # If this character is a sign and the next is a number, | |
20 | # start a new number. | |
21 | if (i+1 < input_len and signed and | |
22 | (char in '-+') and (x[i+1] in all_digits)): | |
23 | # Reset any current string or number. | |
24 | if strings: | |
25 | full_list.append(''.join(strings)) | |
26 | if nums: | |
27 | full_list.append(int(''.join(nums))) | |
28 | strings = [] | |
29 | nums = [char] | |
30 | # If this is a number, add to the number list. | |
31 | elif char in all_digits: | |
32 | nums.append(char) | |
33 | # Reset any string. | |
34 | if strings: | |
35 | full_list.append(''.join(strings)) | |
36 | strings = [] | |
37 | # If this is a unicode digit, append directly to the full list. | |
38 | elif char.isdigit(): | |
39 | # Reset any string or number. | |
40 | if strings: | |
41 | full_list.append(''.join(strings)) | |
42 | if nums: | |
43 | full_list.append(int(''.join(nums))) | |
44 | strings = [] | |
45 | nums = [] | |
46 | full_list.append(unicodedata.digit(char)) | |
47 | # Otherwise add to the string. | |
48 | else: | |
49 | strings.append(char) | |
50 | # Reset any number. | |
51 | if nums: | |
52 | full_list.append(int(''.join(nums))) | |
53 | nums = [] | |
54 | if nums: | |
55 | full_list.append(int(''.join(nums))) | |
56 | elif strings: | |
57 | full_list.append(''.join(strings)) | |
58 | if safe: | |
59 | full_list = sep_inserter(full_list, (int, long), sep) | |
60 | if type(full_list[0]) in (int, long): | |
61 | return [sep] + full_list | |
62 | else: | |
63 | return full_list | |
64 | ||
65 | ||
66 | def float_splitter(x, signed, exp, safe, sep): | |
67 | """Alternate (slow) method to split a string into numbers.""" | |
68 | if not x: | |
69 | return [] | |
70 | all_digits = set('0123456789') | |
71 | full_list, strings, nums = [], [], [] | |
72 | input_len = len(x) | |
73 | for i, char in enumerate(x): | |
74 | # If this character is a sign and the next is a number, | |
75 | # start a new number. | |
76 | if (i+1 < input_len and | |
77 | (signed or (i > 1 and exp and x[i-1] in 'eE' and | |
78 | x[i-2] in all_digits)) and | |
79 | (char in '-+') and (x[i+1] in all_digits)): | |
80 | # Reset any current string or number. | |
81 | if strings: | |
82 | full_list.append(''.join(strings)) | |
83 | if nums and i > 0 and x[i-1] not in 'eE': | |
84 | full_list.append(float(''.join(nums))) | |
85 | nums = [char] | |
86 | else: | |
87 | nums.append(char) | |
88 | strings = [] | |
89 | # If this is a number, add to the number list. | |
90 | elif char in all_digits: | |
91 | nums.append(char) | |
92 | # Reset any string. | |
93 | if strings: | |
94 | full_list.append(''.join(strings)) | |
95 | strings = [] | |
96 | # If this is a decimal, add to the number list. | |
97 | elif (i + 1 < input_len and char == '.' and x[i+1] in all_digits): | |
98 | if nums and '.' in nums: | |
99 | full_list.append(float(''.join(nums))) | |
100 | nums = [] | |
101 | nums.append(char) | |
102 | if strings: | |
103 | full_list.append(''.join(strings)) | |
104 | strings = [] | |
105 | # If this is an exponent, add to the number list. | |
106 | elif (i > 0 and i + 1 < input_len and exp and char in 'eE' and | |
107 | x[i-1] in all_digits and x[i+1] in all_digits | set('+-')): | |
108 | if 'e' in nums or 'E' in nums: | |
109 | strings = [char] | |
110 | full_list.append(float(''.join(nums))) | |
111 | nums = [] | |
112 | else: | |
113 | nums.append(char) | |
114 | # If this is a unicode digit, append directly to the full list. | |
115 | elif unicodedata.numeric(char, None) is not None: | |
116 | # Reset any string or number. | |
117 | if strings: | |
118 | full_list.append(''.join(strings)) | |
119 | if nums: | |
120 | full_list.append(float(''.join(nums))) | |
121 | strings = [] | |
122 | nums = [] | |
123 | full_list.append(unicodedata.numeric(char)) | |
124 | # Otherwise add to the string. | |
125 | else: | |
126 | strings.append(char) | |
127 | # Reset any number. | |
128 | if nums: | |
129 | full_list.append(float(''.join(nums))) | |
130 | nums = [] | |
131 | if nums: | |
132 | full_list.append(float(''.join(nums))) | |
133 | elif strings: | |
134 | full_list.append(''.join(strings)) | |
135 | # Fix a float that looks like a string. | |
136 | fstrings = ('inf', 'infinity', '-inf', '-infinity', | |
137 | '+inf', '+infinity', 'nan') | |
138 | full_list = [float(y) if type(y) != float and y.lower() in fstrings else y | |
139 | for y in full_list] | |
140 | if safe: | |
141 | full_list = sep_inserter(full_list, (float,), sep) | |
142 | if type(full_list[0]) == float: | |
143 | return [sep] + full_list | |
144 | else: | |
145 | return full_list | |
146 | ||
147 | ||
148 | def sep_inserter(x, t, sep): | |
149 | # Simulates the py3_safe function. | |
150 | ret = [x[0]] | |
151 | for i, y in enumerate(x[1:]): | |
152 | if type(y) in t and type(x[i]) in t: | |
153 | ret.append(sep) | |
154 | ret.append(y) | |
155 | return ret |
7 | 7 | from copy import copy |
8 | 8 | from pytest import fail |
9 | 9 | from natsort import natsorted |
10 | from natsort.py23compat import py23_range | |
10 | from natsort.compat.py23 import py23_range | |
11 | 11 | |
12 | 12 | |
13 | 13 | def test_random(): |
1 | 1 | """\ |
2 | 2 | Test the fake fastnumbers module. |
3 | 3 | """ |
4 | from natsort.fake_fastnumbers import fast_float, fast_int, isreal | |
4 | from __future__ import unicode_literals | |
5 | ||
6 | import pytest | |
7 | import unicodedata | |
8 | from math import isnan | |
9 | from natsort.compat.py23 import py23_str | |
10 | from natsort.compat.fake_fastnumbers import ( | |
11 | fast_float, | |
12 | fast_int, | |
13 | isfloat, | |
14 | isint, | |
15 | ) | |
16 | from compat.hypothesis import ( | |
17 | assume, | |
18 | given, | |
19 | use_hypothesis, | |
20 | ) | |
5 | 21 | |
6 | 22 | |
7 | def test_fast_float_converts_float_string_to_float(): | |
23 | def is_float(x): | |
24 | try: | |
25 | float(x) | |
26 | except ValueError: | |
27 | try: | |
28 | unicodedata.numeric(x) | |
29 | except (ValueError, TypeError): | |
30 | return False | |
31 | else: | |
32 | return True | |
33 | else: | |
34 | return True | |
35 | ||
36 | ||
37 | def is_int(x): | |
38 | try: | |
39 | int(x) | |
40 | except ValueError: | |
41 | try: | |
42 | unicodedata.digit(x) | |
43 | except (ValueError, TypeError): | |
44 | return False | |
45 | else: | |
46 | return True | |
47 | else: | |
48 | return True | |
49 | ||
50 | # Each test has an "example" version for demonstrative purposes, | |
51 | # and a test that uses the hypothesis module. | |
52 | ||
53 | ||
54 | def test_fast_float_converts_float_string_to_float_example(): | |
8 | 55 | assert fast_float('45.8') == 45.8 |
9 | 56 | assert fast_float('-45') == -45.0 |
10 | 57 | assert fast_float('45.8e-2') == 45.8e-2 |
58 | assert isnan(fast_float('nan')) | |
11 | 59 | |
12 | 60 | |
13 | def test_fast_float_leaves_string_as_is(): | |
61 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
62 | @given(float) | |
63 | def test_fast_float_converts_float_string_to_float(x): | |
64 | assume(not isnan(x)) # But inf is included | |
65 | assert fast_float(repr(x)) == x | |
66 | ||
67 | ||
68 | def test_fast_float_leaves_string_as_is_example(): | |
14 | 69 | assert fast_float('invalid') == 'invalid' |
15 | 70 | |
16 | 71 | |
17 | def test_fast_int_leaves_float_string_as_is(): | |
18 | assert fast_int('45.8') == '45.8' | |
72 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
73 | @given(py23_str) | |
74 | def test_fast_float_leaves_string_as_is(x): | |
75 | assume(not is_float(x)) | |
76 | assert fast_float(x) == x | |
19 | 77 | |
20 | 78 | |
21 | def test_fast_int_converts_int_string_to_int(): | |
79 | def test_fast_int_leaves_float_string_as_is_example(): | |
80 | assert fast_int('45.8') == '45.8' | |
81 | assert fast_int('nan') == 'nan' | |
82 | assert fast_int('inf') == 'inf' | |
83 | ||
84 | ||
85 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
86 | @given(float) | |
87 | def test_fast_int_leaves_float_string_as_is(x): | |
88 | assume(not x.is_integer()) | |
89 | assert fast_int(repr(x)) == repr(x) | |
90 | ||
91 | ||
92 | def test_fast_int_converts_int_string_to_int_example(): | |
22 | 93 | assert fast_int('-45') == -45 |
23 | 94 | assert fast_int('+45') == 45 |
24 | 95 | |
25 | 96 | |
26 | def test_fast_int_leaves_string_as_is(): | |
97 | @given(int) | |
98 | def test_fast_int_converts_int_string_to_int(x): | |
99 | assert fast_int(repr(x)) == x | |
100 | ||
101 | ||
102 | def test_fast_int_leaves_string_as_is_example(): | |
27 | 103 | assert fast_int('invalid') == 'invalid' |
28 | 104 | |
29 | 105 | |
30 | def test_isreal_returns_True_for_real_numbers_False_for_strings(): | |
31 | assert isreal(-45) | |
32 | assert isreal(45.8e-2) | |
33 | assert not isreal('45.8') | |
34 | assert not isreal('invalid') | |
106 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
107 | @given(py23_str) | |
108 | def test_fast_int_leaves_string_as_is(x): | |
109 | assume(not is_int(x)) | |
110 | assert fast_int(x) == x | |
111 | ||
112 | ||
113 | def test_isfloat_returns_True_for_real_numbers_example(): | |
114 | assert isfloat(-45.0) | |
115 | assert isfloat(45.8e-2) | |
116 | ||
117 | ||
118 | @given(float) | |
119 | def test_isfloat_returns_True_for_real_numbers(x): | |
120 | assert isfloat(x) | |
121 | ||
122 | ||
123 | def test_isfloat_returns_False_for_strings_example(): | |
124 | assert not isfloat('45.8') | |
125 | assert not isfloat('invalid') | |
126 | ||
127 | ||
128 | @given(py23_str) | |
129 | def test_isfloat_returns_False_for_strings(x): | |
130 | assert not isfloat(x) | |
131 | ||
132 | ||
133 | def test_isint_returns_True_for_real_numbers_example(): | |
134 | assert isint(-45) | |
135 | assert isint(45) | |
136 | ||
137 | ||
138 | @given(int) | |
139 | def test_isint_returns_True_for_real_numbers(x): | |
140 | assert isint(x) | |
141 | ||
142 | ||
143 | def test_isint_returns_False_for_strings_example(): | |
144 | assert not isint('45') | |
145 | assert not isint('invalid') | |
146 | ||
147 | ||
148 | @given(py23_str) | |
149 | def test_isint_returns_False_for_strings(x): | |
150 | assert not isint(x) |
1 | 1 | """\ |
2 | 2 | Test the locale help module module. |
3 | 3 | """ |
4 | from __future__ import unicode_literals | |
5 | ||
4 | 6 | import locale |
5 | from natsort.fake_fastnumbers import fast_float | |
6 | from natsort.locale_help import grouper, locale_convert, use_pyicu | |
7 | ||
8 | if use_pyicu: | |
9 | from natsort.locale_help import get_pyicu_transform | |
10 | from locale import getlocale | |
11 | strxfrm = get_pyicu_transform(getlocale()) | |
12 | else: | |
13 | from natsort.locale_help import strxfrm | |
7 | import pytest | |
8 | from math import isnan | |
9 | from itertools import chain | |
10 | from natsort.compat.fake_fastnumbers import fast_float, isfloat | |
11 | from natsort.locale_help import grouper, locale_convert | |
12 | from natsort.compat.py23 import py23_str | |
13 | from natsort.compat.locale import use_pyicu | |
14 | from compat.locale import ( | |
15 | load_locale, | |
16 | has_locale_de_DE, | |
17 | get_strxfrm, | |
18 | low, | |
19 | ) | |
20 | from compat.hypothesis import ( | |
21 | assume, | |
22 | given, | |
23 | use_hypothesis, | |
24 | ) | |
14 | 25 | |
15 | 26 | |
16 | def test_grouper_returns_letters_with_lowercase_transform_of_letter(): | |
17 | assert grouper('HELLO', fast_float) == 'hHeElLlLoO' | |
18 | assert grouper('hello', fast_float) == 'hheelllloo' | |
27 | # Each test has an "example" version for demonstrative purposes, | |
28 | # and a test that uses the hypothesis module. | |
19 | 29 | |
20 | 30 | |
21 | def test_grouper_returns_float_string_as_float(): | |
22 | assert grouper('45.8e-2', fast_float) == 45.8e-2 | |
31 | def test_grouper_returns_letters_with_lowercase_transform_of_letter_example(): | |
32 | assert grouper('HELLO', (fast_float, isfloat)) == 'hHeElLlLoO' | |
33 | assert grouper('hello', (fast_float, isfloat)) == 'hheelllloo' | |
23 | 34 | |
24 | 35 | |
25 | def test_locale_convert_transforms_float_string_to_float(): | |
26 | locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') | |
27 | assert locale_convert('45.8', fast_float, False) == 45.8 | |
36 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
37 | @given(py23_str) | |
38 | def test_grouper_returns_letters_with_lowercase_transform_of_letter(x): | |
39 | assume(type(fast_float(x)) is not float) | |
40 | assert grouper(x, (fast_float, isfloat)) == ''.join(chain.from_iterable([low(y), y] for y in x)) | |
41 | ||
42 | ||
43 | def test_grouper_returns_float_string_as_float_example(): | |
44 | assert grouper('45.8e-2', (fast_float, isfloat)) == 45.8e-2 | |
45 | ||
46 | ||
47 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
48 | @given(float) | |
49 | def test_grouper_returns_float_string_as_float(x): | |
50 | assume(not isnan(x)) | |
51 | assert grouper(repr(x), (fast_float, isfloat)) == x | |
52 | ||
53 | ||
54 | def test_locale_convert_transforms_float_string_to_float_example(): | |
55 | load_locale('en_US') | |
56 | assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 | |
28 | 57 | locale.setlocale(locale.LC_NUMERIC, str('')) |
29 | 58 | |
30 | 59 | |
31 | def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(): | |
32 | locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') | |
33 | if use_pyicu: | |
34 | from natsort.locale_help import get_pyicu_transform | |
35 | from locale import getlocale | |
36 | strxfrm = get_pyicu_transform(getlocale()) | |
37 | else: | |
38 | from natsort.locale_help import strxfrm | |
39 | assert locale_convert('45,8', fast_float, False) == strxfrm('45,8') | |
40 | assert locale_convert('hello', fast_float, False) == strxfrm('hello') | |
60 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
61 | @given(float) | |
62 | def test_locale_convert_transforms_float_string_to_float(x): | |
63 | assume(not isnan(x)) | |
64 | load_locale('en_US') | |
65 | assert locale_convert(repr(x), (fast_float, isfloat), False) == x | |
41 | 66 | locale.setlocale(locale.LC_NUMERIC, str('')) |
42 | 67 | |
43 | 68 | |
44 | def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(): | |
45 | locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') | |
46 | if use_pyicu: | |
47 | from natsort.locale_help import get_pyicu_transform | |
48 | from locale import getlocale | |
49 | strxfrm = get_pyicu_transform(getlocale()) | |
50 | else: | |
51 | from natsort.locale_help import strxfrm | |
52 | assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') | |
53 | assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88') | |
69 | def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string_example(): | |
70 | load_locale('en_US') | |
71 | strxfrm = get_strxfrm() | |
72 | assert locale_convert('45,8', (fast_float, isfloat), False) == strxfrm('45,8') | |
73 | assert locale_convert('hello', (fast_float, isfloat), False) == strxfrm('hello') | |
54 | 74 | locale.setlocale(locale.LC_NUMERIC, str('')) |
55 | 75 | |
56 | 76 | |
57 | def test_locale_convert_transforms_float_string_to_float_with_de_locale(): | |
58 | locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8') | |
59 | assert locale_convert('45.8', fast_float, False) == 45.8 | |
60 | assert locale_convert('45,8', fast_float, False) == 45.8 | |
77 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
78 | @given(py23_str) | |
79 | def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(x): | |
80 | assume(type(fast_float(x)) is not float) | |
81 | load_locale('en_US') | |
82 | strxfrm = get_strxfrm() | |
83 | assert locale_convert(x, (fast_float, isfloat), False) == strxfrm(x) | |
61 | 84 | locale.setlocale(locale.LC_NUMERIC, str('')) |
85 | ||
86 | ||
87 | def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters_example(): | |
88 | load_locale('en_US') | |
89 | strxfrm = get_strxfrm() | |
90 | assert locale_convert('hello', (fast_float, isfloat), True) == strxfrm('hheelllloo') | |
91 | assert locale_convert('45,8', (fast_float, isfloat), True) == strxfrm('4455,,88') | |
92 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
93 | ||
94 | ||
95 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
96 | @given(py23_str) | |
97 | def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(x): | |
98 | assume(type(fast_float(x)) is not float) | |
99 | load_locale('en_US') | |
100 | strxfrm = get_strxfrm() | |
101 | assert locale_convert(x, (fast_float, isfloat), True) == strxfrm(''.join(chain.from_iterable([low(y), y] for y in x))) | |
102 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
103 | ||
104 | ||
105 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
106 | def test_locale_convert_transforms_float_string_to_float_with_de_locale_example(): | |
107 | load_locale('de_DE') | |
108 | assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 | |
109 | assert locale_convert('45,8', (fast_float, isfloat), False) == 45.8 | |
110 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
111 | ||
112 | ||
113 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
114 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
115 | @given(float) | |
116 | def test_locale_convert_transforms_float_string_to_float_with_de_locale(x): | |
117 | assume(not isnan(x)) | |
118 | load_locale('de_DE') | |
119 | assert locale_convert(repr(x), (fast_float, isfloat), False) == x | |
120 | assert locale_convert(repr(x).replace('.', ','), (fast_float, isfloat), False) == x | |
121 | locale.setlocale(locale.LC_NUMERIC, str('')) |
1 | 1 | """\ |
2 | 2 | Test the natsort command-line tool functions. |
3 | 3 | """ |
4 | from __future__ import print_function | |
4 | from __future__ import print_function, unicode_literals | |
5 | import pytest | |
5 | 6 | import re |
6 | 7 | import sys |
7 | 8 | from pytest import raises |
8 | try: | |
9 | from unittest.mock import patch, call | |
10 | except ImportError: | |
11 | from mock import patch, call | |
12 | from natsort.__main__ import main, range_check, check_filter | |
13 | from natsort.__main__ import keep_entry_range, exclude_entry | |
14 | from natsort.__main__ import sort_and_print_entries | |
9 | from compat.mock import patch, call | |
10 | from compat.hypothesis import ( | |
11 | assume, | |
12 | given, | |
13 | integers_from, | |
14 | integers_in_range, | |
15 | sampled_from, | |
16 | use_hypothesis, | |
17 | ) | |
18 | from natsort.__main__ import ( | |
19 | main, | |
20 | range_check, | |
21 | check_filter, | |
22 | keep_entry_range, | |
23 | exclude_entry, | |
24 | sort_and_print_entries, | |
25 | py23_str, | |
26 | ) | |
15 | 27 | |
16 | 28 | |
17 | 29 | def test_main_passes_default_arguments_with_no_command_line_options(): |
24 | 36 | assert args.reverse_filter is None |
25 | 37 | assert args.exclude is None |
26 | 38 | assert not args.reverse |
27 | assert args.number_type == 'float' | |
28 | assert args.signed | |
39 | assert args.number_type == 'int' | |
40 | assert not args.signed | |
29 | 41 | assert args.exp |
30 | 42 | assert not args.locale |
31 | 43 | |
35 | 47 | sys.argv[1:] = ['--paths', '--reverse', '--locale', |
36 | 48 | '--filter', '4', '10', |
37 | 49 | '--reverse-filter', '100', '110', |
38 | '--number-type', 'int', | |
39 | '--nosign', '--noexp', | |
50 | '--number-type', 'float', '--noexp', '--sign', | |
40 | 51 | '--exclude', '34', '--exclude', '35', |
41 | 52 | 'num-2', 'num-6', 'num-1'] |
42 | 53 | main() |
46 | 57 | assert args.reverse_filter == [(100.0, 110.0)] |
47 | 58 | assert args.exclude == [34, 35] |
48 | 59 | assert args.reverse |
49 | assert args.number_type == 'int' | |
50 | assert not args.signed | |
60 | assert args.number_type == 'float' | |
61 | assert args.signed | |
51 | 62 | assert not args.exp |
52 | 63 | assert args.locale |
53 | ||
54 | ||
55 | def test_range_check_returns_range_as_is_but_with_floats(): | |
56 | assert range_check(10, 11) == (10.0, 11.0) | |
57 | assert range_check(6.4, 30) == (6.4, 30.0) | |
58 | ||
59 | ||
60 | def test_range_check_raises_ValueError_if_range_is_invalid(): | |
61 | with raises(ValueError) as err: | |
62 | range_check(7, 2) | |
63 | assert str(err.value) == 'low >= high' | |
64 | ||
65 | ||
66 | def test_check_filter_returns_None_if_filter_evaluates_to_False(): | |
67 | assert check_filter(()) is None | |
68 | assert check_filter(False) is None | |
69 | assert check_filter(None) is None | |
70 | ||
71 | ||
72 | def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(): | |
73 | assert check_filter([(6, 7)]) == [(6.0, 7.0)] | |
74 | assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)] | |
75 | ||
76 | ||
77 | def test_check_filter_raises_ValueError_if_filter_is_invalid(): | |
78 | with raises(ValueError) as err: | |
79 | check_filter([(7, 2)]) | |
80 | assert str(err.value) == 'Error in --filter: low >= high' | |
81 | ||
82 | ||
83 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(): | |
84 | assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+')) | |
85 | ||
86 | ||
87 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(): | |
88 | assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+')) | |
89 | ||
90 | ||
91 | def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(): | |
92 | assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+')) | |
93 | ||
94 | ||
95 | def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(): | |
96 | assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+')) | |
97 | ||
98 | ||
99 | def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(): | |
100 | assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+')) | |
101 | 64 | |
102 | 65 | |
103 | 66 | class Args: |
197 | 160 | sort_and_print_entries(entries, Args(None, None, False, True, True)) |
198 | 161 | e = [call(entries[i]) for i in reversed([2, 3, 1, 0, 5, 6, 4])] |
199 | 162 | p.assert_has_calls(e) |
163 | ||
164 | ||
165 | # Each test has an "example" version for demonstrative purposes, | |
166 | # and a test that uses the hypothesis module. | |
167 | ||
168 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second_example(): | |
169 | assert range_check(10, 11) == (10.0, 11.0) | |
170 | assert range_check(6.4, 30) == (6.4, 30.0) | |
171 | ||
172 | ||
173 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
174 | @given(x=int, y=int) | |
175 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, y): | |
176 | assume(x < y) | |
177 | assert range_check(x, y) == (float(x), float(y)) | |
178 | ||
179 | ||
180 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
181 | @given(x=float, y=float) | |
182 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, y): | |
183 | assume(x < y) | |
184 | assert range_check(x, y) == (x, y) | |
185 | ||
186 | ||
187 | def test_range_check_raises_ValueError_if_second_is_less_than_first_example(): | |
188 | with raises(ValueError) as err: | |
189 | range_check(7, 2) | |
190 | assert str(err.value) == 'low >= high' | |
191 | ||
192 | ||
193 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
194 | @given(x=float, y=float) | |
195 | def test_range_check_raises_ValueError_if_second_is_less_than_first(x, y): | |
196 | assume(x >= y) | |
197 | with raises(ValueError) as err: | |
198 | range_check(x, x) | |
199 | assert str(err.value) == 'low >= high' | |
200 | ||
201 | ||
202 | def test_check_filter_returns_None_if_filter_evaluates_to_False(): | |
203 | assert check_filter(()) is None | |
204 | assert check_filter(False) is None | |
205 | assert check_filter(None) is None | |
206 | ||
207 | ||
208 | def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid_example(): | |
209 | assert check_filter([(6, 7)]) == [(6.0, 7.0)] | |
210 | assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)] | |
211 | ||
212 | ||
213 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
214 | @given(x=(int, int, float, float), y=(int, float, float, int)) | |
215 | def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(x, y): | |
216 | assume(all(i < j for i, j in zip(x, y))) | |
217 | assert check_filter(list(zip(x, y))) == [(float(i), float(j)) for i, j in zip(x, y)] | |
218 | ||
219 | ||
220 | def test_check_filter_raises_ValueError_if_filter_is_invalid_example(): | |
221 | with raises(ValueError) as err: | |
222 | check_filter([(7, 2)]) | |
223 | assert str(err.value) == 'Error in --filter: low >= high' | |
224 | ||
225 | ||
226 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
227 | @given(x=(int, int, float, float), y=(int, float, float, int)) | |
228 | def test_check_filter_raises_ValueError_if_filter_is_invalid(x, y): | |
229 | assume(any(i >= j for i, j in zip(x, y))) | |
230 | with raises(ValueError) as err: | |
231 | check_filter(list(zip(x, y))) | |
232 | assert str(err.value) == 'Error in --filter: low >= high' | |
233 | ||
234 | ||
235 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds_example(): | |
236 | assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+')) | |
237 | ||
238 | ||
239 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
240 | @given((py23_str, integers_in_range(1, 99), py23_str, integers_in_range(1, 99), py23_str)) | |
241 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(x): | |
242 | s = ''.join(map(py23_str, x)) | |
243 | assume(any(0 < int(i) < 100 for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
244 | assert keep_entry_range(s, [0], [100], int, re.compile(r'\d+')) | |
245 | ||
246 | ||
247 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds_example(): | |
248 | assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+')) | |
249 | ||
250 | ||
251 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
252 | @given((py23_str, integers_in_range(2, 89), py23_str, integers_in_range(2, 89), py23_str)) | |
253 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(x): | |
254 | s = ''.join(map(py23_str, x)) | |
255 | assume(any((1 < int(i) < 20) or (88 < int(i) < 90) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
256 | assert keep_entry_range(s, [1, 88], [20, 90], int, re.compile(r'\d+')) | |
257 | ||
258 | ||
259 | def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds_example(): | |
260 | assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+')) | |
261 | ||
262 | ||
263 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
264 | @given((py23_str, integers_from(21), py23_str, integers_from(21), py23_str)) | |
265 | def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(x): | |
266 | s = ''.join(map(py23_str, x)) | |
267 | assume(all(not (1 <= int(i) <= 20) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
268 | assert not keep_entry_range(s, [1], [20], int, re.compile(r'\d+')) | |
269 | ||
270 | ||
271 | def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example(): | |
272 | assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+')) | |
273 | ||
274 | ||
275 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
276 | @given((py23_str, integers_from(0), py23_str, integers_from(0), py23_str)) | |
277 | def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(x): | |
278 | s = ''.join(map(py23_str, x)) | |
279 | assume(not any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
280 | assert exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) | |
281 | ||
282 | ||
283 | def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example(): | |
284 | assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+')) | |
285 | ||
286 | ||
287 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
288 | @given((py23_str, sampled_from([23, 45, 87]), py23_str, sampled_from([23, 45, 87]), py23_str)) | |
289 | def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(x): | |
290 | s = ''.join(map(py23_str, x)) | |
291 | assume(any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
292 | assert not exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) |
3 | 3 | See the README or the natsort homepage for more details. |
4 | 4 | """ |
5 | 5 | from __future__ import unicode_literals, print_function |
6 | import pytest | |
6 | 7 | import sys |
7 | 8 | import warnings |
8 | 9 | import locale |
9 | 10 | from operator import itemgetter |
10 | 11 | from pytest import raises |
11 | from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted | |
12 | from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index, ns | |
13 | from natsort import realsorted, index_realsorted, decoder, as_ascii, as_utf8 | |
12 | from natsort import ( | |
13 | natsorted, | |
14 | index_natsorted, | |
15 | natsort_key, | |
16 | versorted, | |
17 | index_versorted, | |
18 | humansorted, | |
19 | index_humansorted, | |
20 | natsort_keygen, | |
21 | order_by_index, | |
22 | ns, | |
23 | realsorted, | |
24 | index_realsorted, | |
25 | decoder, | |
26 | as_ascii, | |
27 | as_utf8, | |
28 | ) | |
29 | from compat.locale import load_locale, has_locale_de_DE | |
14 | 30 | from natsort.utils import _natsort_key |
15 | 31 | |
16 | 32 | |
40 | 56 | # But it raises a deprecation warning |
41 | 57 | with warnings.catch_warnings(record=True) as w: |
42 | 58 | warnings.simplefilter("always") |
43 | assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F) | |
59 | assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I) | |
44 | 60 | assert len(w) == 1 |
45 | 61 | assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) |
46 | 62 | # It is called for each element in a list when sorting |
53 | 69 | |
54 | 70 | def test_natsort_keygen_returns_natsort_key_with_alg_option(): |
55 | 71 | a = 'a-5.034e1' |
56 | assert natsort_keygen()(a) == _natsort_key(a, None, ns.F) | |
57 | assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, None, ns.I | ns.U) | |
72 | assert natsort_keygen()(a) == _natsort_key(a, None, ns.I) | |
73 | assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S) | |
58 | 74 | |
59 | 75 | |
60 | 76 | def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key(): |
62 | 78 | f1 = natsort_keygen(key=lambda x: x.upper()) |
63 | 79 | |
64 | 80 | def f2(x): |
65 | return _natsort_key(x, lambda y: y.upper(), ns.F) | |
81 | return _natsort_key(x, lambda y: y.upper(), ns.I) | |
66 | 82 | assert f1(a) == f2(a) |
67 | 83 | |
68 | 84 | |
69 | 85 | def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted(): |
70 | 86 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] |
71 | 87 | b = a[:] |
72 | a.sort(key=natsort_keygen(alg=ns.I)) | |
73 | assert a == natsorted(b, alg=ns.I) | |
88 | a.sort(key=natsort_keygen(alg=ns.F)) | |
89 | assert a == natsorted(b, alg=ns.F) | |
74 | 90 | |
75 | 91 | |
76 | 92 | def test_natsorted_returns_strings_with_numbers_in_ascending_order(): |
79 | 95 | |
80 | 96 | |
81 | 97 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents(): |
82 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
83 | assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
84 | ||
85 | ||
86 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_without_exponents_with_NOEXP_option(): | |
87 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
88 | assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] | |
89 | ||
90 | ||
91 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_INT_option(): | |
92 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
93 | assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
94 | ||
95 | ||
96 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_option(): | |
97 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
98 | assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
99 | ||
100 | ||
101 | def test_natsorted_returns_list_of_numbers_sorted_without_accounting_for_sign_with_UNSIGNED_option(): | |
98 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
99 | assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
100 | ||
101 | ||
102 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option(): | |
103 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
104 | assert natsorted(a, alg=ns.N | ns.F | ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
105 | # UNSIGNED is default | |
106 | assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
107 | ||
108 | ||
109 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option(): | |
110 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
111 | assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
112 | # INT is default | |
113 | assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
114 | ||
115 | ||
116 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option(): | |
117 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
118 | assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
119 | assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
120 | ||
121 | ||
122 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option(): | |
123 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
124 | assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
125 | ||
126 | ||
127 | def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option(): | |
102 | 128 | a = ['a-5', 'a7', 'a+2'] |
103 | assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5'] | |
104 | ||
105 | ||
106 | def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_without_UNSIGNED_option(): | |
129 | assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7'] | |
130 | ||
131 | ||
132 | def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option(): | |
107 | 133 | a = ['a-5', 'a7', 'a+2'] |
108 | assert natsorted(a) == ['a-5', 'a+2', 'a7'] | |
109 | ||
110 | ||
111 | def test_natsorted_returns_list_of_version_numbers_improperly_sorted_without_VERSION_option(): | |
134 | assert natsorted(a) == ['a7', 'a+2', 'a-5'] | |
135 | ||
136 | ||
137 | def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option(): | |
112 | 138 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
113 | assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] | |
114 | ||
115 | ||
116 | def test_natsorted_returns_sorted_list_of_version_numbers_with_VERSION_option(): | |
117 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
139 | assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
118 | 140 | assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] |
119 | 141 | |
120 | 142 | |
130 | 152 | def test_natsorted_with_mixed_input_returns_sorted_results_without_error(): |
131 | 153 | a = ['2', 'ä', 'b', 1.5, 3] |
132 | 154 | assert natsorted(a) == [1.5, '2', 3, 'b', 'ä'] |
155 | ||
156 | ||
157 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST(): | |
158 | a = ['25', 5, float('nan'), 1E40] | |
159 | # The slice is because NaN != NaN | |
160 | assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3] | |
161 | ||
162 | ||
163 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST(): | |
164 | a = ['25', 5, float('nan'), 1E40] | |
165 | # The slice is because NaN != NaN | |
166 | assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:] | |
133 | 167 | |
134 | 168 | |
135 | 169 | def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3(): |
232 | 266 | |
233 | 267 | def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters(): |
234 | 268 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] |
235 | locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
269 | load_locale('en_US') | |
236 | 270 | assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] |
237 | 271 | locale.setlocale(locale.LC_ALL, str('')) |
238 | 272 | |
239 | 273 | |
274 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped(): | |
275 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
276 | load_locale('en_US') | |
277 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
278 | locale.setlocale(locale.LC_ALL, str('')) | |
279 | ||
280 | ||
281 | def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters(): | |
282 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
283 | load_locale('en_US') | |
284 | assert natsorted(a, alg=ns.LOCALE | ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
285 | locale.setlocale(locale.LC_ALL, str('')) | |
286 | ||
287 | ||
288 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped(): | |
289 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
290 | load_locale('en_US') | |
291 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST | ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
292 | locale.setlocale(locale.LC_ALL, str('')) | |
293 | ||
294 | ||
240 | 295 | def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language(): |
241 | locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
296 | load_locale('en_US') | |
242 | 297 | a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] |
243 | assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] | |
244 | locale.setlocale(locale.LC_ALL, str('')) | |
245 | ||
246 | ||
298 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] | |
299 | locale.setlocale(locale.LC_ALL, str('')) | |
300 | ||
301 | ||
302 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
247 | 303 | def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): |
248 | locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8')) | |
304 | load_locale('de_DE') | |
249 | 305 | a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] |
250 | assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] | |
306 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] | |
251 | 307 | locale.setlocale(locale.LC_ALL, str('')) |
252 | 308 | |
253 | 309 | |
254 | 310 | def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): |
255 | locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
311 | load_locale('en_US') | |
256 | 312 | a = ['0', 'Á', '2', 'Z'] |
257 | 313 | assert natsorted(a) == ['0', '2', 'Z', 'Á'] |
258 | 314 | a = ['2', 'ä', 'b', 1.5, 3] |
260 | 316 | locale.setlocale(locale.LC_ALL, str('')) |
261 | 317 | |
262 | 318 | |
263 | def test_versorted_returns_results_identical_to_natsorted_with_VERSION(): | |
319 | def test_versorted_returns_results_identical_to_natsorted(): | |
264 | 320 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
265 | assert versorted(a) == natsorted(a, alg=ns.VERSION) | |
321 | # versorted is retained for backwards compatibility | |
322 | assert versorted(a) == natsorted(a) | |
323 | ||
324 | ||
325 | def test_realsorted_returns_results_identical_to_natsorted_with_REAL(): | |
326 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
327 | assert realsorted(a) == natsorted(a, alg=ns.REAL) | |
266 | 328 | |
267 | 329 | |
268 | 330 | def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE(): |
269 | 331 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] |
270 | 332 | assert humansorted(a) == natsorted(a, alg=ns.LOCALE) |
271 | ||
272 | ||
273 | def test_realsorted_returns_results_identical_to_natsorted(): | |
274 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
275 | assert realsorted(a) == natsorted(a) | |
276 | 333 | |
277 | 334 | |
278 | 335 | def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list(): |
311 | 368 | assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] |
312 | 369 | |
313 | 370 | |
314 | def test_index_versorted_returns_results_identical_to_index_natsorted_with_VERSION(): | |
371 | def test_index_versorted_returns_results_identical_to_index_natsorted(): | |
315 | 372 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
316 | assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION) | |
373 | # index_versorted is retained for backwards compatibility | |
374 | assert index_versorted(a) == index_natsorted(a) | |
375 | ||
376 | ||
377 | def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL(): | |
378 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
379 | assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL) | |
317 | 380 | |
318 | 381 | |
319 | 382 | def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): |
320 | 383 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] |
321 | 384 | assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) |
322 | ||
323 | ||
324 | def test_index_realsorted_returns_results_identical_to_index_natsorted(): | |
325 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
326 | assert index_realsorted(a) == index_natsorted(a) | |
327 | 385 | |
328 | 386 | |
329 | 387 | def test_order_by_index_sorts_list_according_to_order_of_integer_list(): |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Test the Unicode numbers module. | |
3 | """ | |
4 | from __future__ import unicode_literals | |
5 | import unicodedata | |
6 | from natsort.compat.py23 import py23_range, py23_unichr | |
7 | from natsort.unicode_numbers import ( | |
8 | numeric_chars, | |
9 | numeric, | |
10 | digit_chars, | |
11 | digits, | |
12 | ) | |
13 | ||
14 | ||
15 | def test_numeric_chars_contains_only_valid_unicode_numeric_characters(): | |
16 | for a in numeric_chars: | |
17 | assert unicodedata.numeric(a, None) is not None | |
18 | ||
19 | ||
20 | def test_digit_chars_contains_only_valid_unicode_digit_characters(): | |
21 | for a in digit_chars: | |
22 | assert unicodedata.digit(a, None) is not None | |
23 | ||
24 | ||
25 | def test_numeric_chars_contains_all_valid_unicode_numeric_characters(): | |
26 | for i in py23_range(0X10FFFF): | |
27 | try: | |
28 | a = py23_unichr(i) | |
29 | except ValueError: | |
30 | break | |
31 | if a in set('0123456789'): | |
32 | continue | |
33 | if unicodedata.numeric(a, None) is not None: | |
34 | assert a in numeric_chars | |
35 | ||
36 | ||
37 | def test_digit_chars_contains_all_valid_unicode_digit_characters(): | |
38 | for i in py23_range(0X10FFFF): | |
39 | try: | |
40 | a = py23_unichr(i) | |
41 | except ValueError: | |
42 | break | |
43 | if a in set('0123456789'): | |
44 | continue | |
45 | if unicodedata.digit(a, None) is not None: | |
46 | assert a in digit_chars | |
47 | ||
48 | ||
49 | def test_combined_string_contains_all_characters_in_list(): | |
50 | assert numeric == ''.join(numeric_chars) | |
51 | assert digits == ''.join(digit_chars) |
3 | 3 | |
4 | 4 | import sys |
5 | 5 | import locale |
6 | import pathlib | |
7 | import pytest | |
8 | import string | |
9 | from math import isnan | |
6 | 10 | from operator import itemgetter |
11 | from itertools import chain | |
7 | 12 | from pytest import raises |
8 | 13 | from natsort.ns_enum import ns |
9 | from natsort.utils import _number_extracter, _py3_safe, _natsort_key, _args_to_enum | |
10 | from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re | |
11 | from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re, _do_decoding | |
12 | from natsort.locale_help import use_pyicu, null_string | |
13 | from natsort.py23compat import py23_str | |
14 | ||
15 | try: | |
16 | from fastnumbers import fast_float, fast_int | |
17 | except ImportError: | |
18 | from natsort.fake_fastnumbers import fast_float, fast_int | |
19 | ||
20 | try: | |
21 | import pathlib | |
22 | except ImportError: | |
23 | has_pathlib = False | |
24 | else: | |
25 | has_pathlib = True | |
14 | from natsort.utils import ( | |
15 | _number_extracter, | |
16 | _py3_safe, | |
17 | _natsort_key, | |
18 | _args_to_enum, | |
19 | _float_sign_exp_re, | |
20 | _float_nosign_exp_re, | |
21 | _float_sign_noexp_re, | |
22 | _float_nosign_noexp_re, | |
23 | _int_nosign_re, | |
24 | _int_sign_re, | |
25 | _do_decoding, | |
26 | _path_splitter, | |
27 | _fix_nan, | |
28 | ) | |
29 | from natsort.locale_help import locale_convert | |
30 | from natsort.compat.py23 import py23_str | |
31 | from natsort.compat.locale import ( | |
32 | use_pyicu, | |
33 | null_string, | |
34 | dumb_sort, | |
35 | ) | |
36 | from natsort.compat.fastnumbers import ( | |
37 | fast_float, | |
38 | fast_int, | |
39 | isint, | |
40 | ) | |
41 | from slow_splitters import ( | |
42 | int_splitter, | |
43 | float_splitter, | |
44 | sep_inserter, | |
45 | ) | |
46 | from compat.locale import ( | |
47 | load_locale, | |
48 | get_strxfrm, | |
49 | low, | |
50 | ) | |
51 | from compat.hypothesis import ( | |
52 | assume, | |
53 | given, | |
54 | example, | |
55 | sampled_from, | |
56 | use_hypothesis, | |
57 | ) | |
58 | ||
59 | if sys.version[0] == '3': | |
60 | long = int | |
61 | ||
62 | ichain = chain.from_iterable | |
26 | 63 | |
27 | 64 | |
28 | 65 | def test_do_decoding_decodes_bytes_string_to_unicode(): |
31 | 68 | assert _do_decoding(b'bytes', 'ascii') == b'bytes'.decode('ascii') |
32 | 69 | |
33 | 70 | |
71 | def test_args_to_enum_raises_TypeError_for_invalid_argument(): | |
72 | with raises(TypeError): | |
73 | _args_to_enum(**{'alf': 0}) | |
74 | ||
75 | ||
34 | 76 | def test_args_to_enum_converts_signed_exp_float_to_ns_F(): |
35 | 77 | # number_type, signed, exp, as_path, py3_safe |
36 | assert _args_to_enum(float, True, True, False, False) == ns.F | |
78 | assert _args_to_enum(**{'number_type': float, | |
79 | 'signed': True, | |
80 | 'exp': True}) == ns.F | ns.S | |
37 | 81 | |
38 | 82 | |
39 | 83 | def test_args_to_enum_converts_signed_noexp_float_to_ns_FN(): |
40 | 84 | # number_type, signed, exp, as_path, py3_safe |
41 | assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N | |
85 | assert _args_to_enum(**{'number_type': float, | |
86 | 'signed': True, | |
87 | 'exp': False}) == ns.F | ns.N | ns.S | |
42 | 88 | |
43 | 89 | |
44 | 90 | def test_args_to_enum_converts_unsigned_exp_float_to_ns_FU(): |
45 | 91 | # number_type, signed, exp, as_path, py3_safe |
46 | assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U | |
92 | assert _args_to_enum(**{'number_type': float, | |
93 | 'signed': False, | |
94 | 'exp': True}) == ns.F | ns.U | |
95 | # unsigned is default | |
96 | assert _args_to_enum(**{'number_type': float, | |
97 | 'signed': False, | |
98 | 'exp': True}) == ns.F | |
47 | 99 | |
48 | 100 | |
49 | 101 | def test_args_to_enum_converts_unsigned_unexp_float_to_ns_FNU(): |
50 | 102 | # number_type, signed, exp, as_path, py3_safe |
51 | assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N | |
52 | ||
53 | ||
54 | def test_args_to_enum_converts_signed_exp_float_and_path_and_py3safe_to_ns_FPT(): | |
103 | assert _args_to_enum(**{'number_type': float, | |
104 | 'signed': False, | |
105 | 'exp': False}) == ns.F | ns.U | ns.N | |
106 | ||
107 | ||
108 | def test_args_to_enum_converts_float_and_path_and_py3safe_to_ns_FPT(): | |
55 | 109 | # number_type, signed, exp, as_path, py3_safe |
56 | assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T | |
57 | ||
58 | ||
59 | def test_args_to_enum_converts_singed_int_and_path_to_ns_IP(): | |
110 | assert _args_to_enum(**{'number_type': float, | |
111 | 'as_path': True, | |
112 | 'py3_safe': True}) == ns.F | ns.P | ns.T | |
113 | ||
114 | ||
115 | def test_args_to_enum_converts_int_and_path_to_ns_IP(): | |
60 | 116 | # number_type, signed, exp, as_path, py3_safe |
61 | assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P | |
117 | assert _args_to_enum(**{'number_type': int, 'as_path': True}) == ns.I | ns.P | |
62 | 118 | |
63 | 119 | |
64 | 120 | def test_args_to_enum_converts_unsigned_int_and_py3safe_to_ns_IUT(): |
65 | 121 | # number_type, signed, exp, as_path, py3_safe |
66 | assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T | |
122 | assert _args_to_enum(**{'number_type': int, | |
123 | 'signed': False, | |
124 | 'py3_safe': True}) == ns.I | ns.U | ns.T | |
67 | 125 | |
68 | 126 | |
69 | 127 | def test_args_to_enum_converts_None_to_ns_IU(): |
70 | 128 | # number_type, signed, exp, as_path, py3_safe |
71 | assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U | |
72 | ||
73 | # fttt = (fast_float, True, True, True) | |
74 | # fttf = (fast_float, True, True, False) | |
75 | ftft = (fast_float, True, False, True) | |
76 | ftff = (fast_float, True, False, False) | |
77 | # fftt = (fast_float, False, True, True) | |
78 | ffft = (fast_float, False, False, True) | |
79 | # fftf = (fast_float, False, True, False) | |
80 | ffff = (fast_float, False, False, False) | |
81 | ittt = (fast_int, True, True, True) | |
82 | ittf = (fast_int, True, True, False) | |
83 | itft = (fast_int, True, False, True) | |
84 | itff = (fast_int, True, False, False) | |
85 | # iftt = (fast_int, False, True, True) | |
86 | ifft = (fast_int, False, False, True) | |
87 | # iftf = (fast_int, False, True, False) | |
88 | ifff = (fast_int, False, False, False) | |
89 | ||
90 | ||
91 | def test_number_extracter_raises_TypeError_if_given_a_number(): | |
129 | assert _args_to_enum(**{'number_type': None, | |
130 | 'exp': True}) == ns.I | ns.U | |
131 | ||
132 | float_nosafe_locale_group = (fast_float, False, True, True) | |
133 | float_nosafe_locale_nogroup = (fast_float, False, True, False) | |
134 | float_safe_nolocale_nogroup = (fast_float, True, False, False) | |
135 | float_nosafe_nolocale_group = (fast_float, False, False, True) | |
136 | float_nosafe_nolocale_nogroup = (fast_float, False, False, False) | |
137 | int_safe_locale_group = (fast_int, True, True, True) | |
138 | int_safe_locale_nogroup = (fast_int, True, True, False) | |
139 | int_safe_nolocale_group = (fast_int, True, False, True) | |
140 | int_safe_nolocale_nogroup = (fast_int, True, False, False) | |
141 | int_nosafe_locale_group = (fast_int, False, True, True) | |
142 | int_nosafe_locale_nogroup = (fast_int, False, True, False) | |
143 | int_nosafe_nolocale_group = (fast_int, False, False, True) | |
144 | int_nosafe_nolocale_nogroup = (fast_int, False, False, False) | |
145 | ||
146 | ||
147 | def test_fix_nan_converts_nan_to_negative_infinity_without_NANLAST(): | |
148 | assert _fix_nan((float('nan'),), 0) == (float('-inf'),) | |
149 | assert _fix_nan(('a', 'b', float('nan')), 0) == ('a', 'b', float('-inf')) | |
150 | ||
151 | ||
152 | def test_fix_nan_converts_nan_to_positive_infinity_with_NANLAST(): | |
153 | assert _fix_nan((float('nan'),), ns.NANLAST) == (float('+inf'),) | |
154 | assert _fix_nan(('a', 'b', float('nan')), ns.NANLAST) == ('a', 'b', float('+inf')) | |
155 | ||
156 | ||
157 | # Each test has an "example" version for demonstrative purposes, | |
158 | # and a test that uses the hypothesis module. | |
159 | ||
160 | ||
161 | def test_py3_safe_does_nothing_if_no_numbers_example(): | |
162 | assert _py3_safe(['a', 'b', 'c'], False, isint) == ['a', 'b', 'c'] | |
163 | assert _py3_safe(['a'], False, isint) == ['a'] | |
164 | ||
165 | ||
166 | def test_py3_safe_does_nothing_if_only_one_number_example(): | |
167 | assert _py3_safe(['a', 5], False, isint) == ['a', 5] | |
168 | ||
169 | ||
170 | def test_py3_safe_inserts_empty_string_between_two_numbers_example(): | |
171 | assert _py3_safe([5, 9], False, isint) == [5, '', 9] | |
172 | ||
173 | ||
174 | def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers_example(): | |
175 | assert _py3_safe([5, 9], True, isint) == [5, null_string, 9] | |
176 | ||
177 | ||
178 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
179 | @given([py23_str, int]) | |
180 | def test_py3_safe_inserts_empty_string_between_two_numbers(x): | |
181 | assume(bool(x)) | |
182 | assert _py3_safe(x, False, isint) == sep_inserter(x, (int, long), '') | |
183 | ||
184 | ||
185 | def test_path_splitter_splits_path_string_by_separator_example(): | |
186 | z = '/this/is/a/path' | |
187 | assert _path_splitter(z) == list(pathlib.Path(z).parts) | |
188 | ||
189 | ||
190 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
191 | @given([sampled_from(string.ascii_letters)]) | |
192 | def test_path_splitter_splits_path_string_by_separator(x): | |
193 | assume(len(x) > 1) | |
194 | assume(all(x)) | |
195 | z = py23_str(pathlib.Path(*x)) | |
196 | assert _path_splitter(z) == list(pathlib.Path(z).parts) | |
197 | ||
198 | ||
199 | def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example(): | |
200 | z = '/this/is/a/path/file.exe' | |
201 | y = list(pathlib.Path(z).parts) | |
202 | assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] | |
203 | ||
204 | ||
205 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
206 | @given([sampled_from(string.ascii_letters)]) | |
207 | def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x): | |
208 | assume(len(x) > 2) | |
209 | assume(all(x)) | |
210 | z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1] | |
211 | y = list(pathlib.Path(z).parts) | |
212 | assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] | |
213 | ||
214 | ||
215 | def test_number_extracter_raises_TypeError_if_given_a_number_example(): | |
92 | 216 | with raises(TypeError): |
93 | assert _number_extracter(50.0, _float_sign_exp_re, *ffff) | |
94 | ||
95 | ||
96 | def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(): | |
97 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] | |
98 | ||
99 | ||
100 | def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(): | |
101 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] | |
102 | ||
103 | ||
104 | def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(): | |
105 | assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] | |
106 | ||
107 | ||
108 | def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(): | |
109 | assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
110 | ||
111 | ||
112 | def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(): | |
113 | assert _number_extracter('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
114 | ||
115 | ||
116 | def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(): | |
117 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] | |
118 | ||
119 | ||
120 | def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(): | |
121 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] | |
122 | ||
123 | ||
124 | def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(): | |
125 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] | |
126 | ||
127 | ||
128 | def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent(): | |
129 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] | |
130 | ||
131 | ||
132 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number(): | |
133 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] | |
134 | ||
135 | ||
136 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe(): | |
137 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] | |
138 | ||
139 | ||
140 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(): | |
141 | assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *ffft) == ['aA', 5.0, 0.5034] | |
142 | ||
143 | ||
144 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(): | |
145 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ifft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] | |
146 | ||
147 | ||
148 | def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(): | |
149 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
150 | if use_pyicu: | |
151 | from natsort.locale_help import get_pyicu_transform | |
152 | from locale import getlocale | |
153 | strxfrm = get_pyicu_transform(getlocale()) | |
154 | else: | |
155 | from natsort.locale_help import strxfrm | |
156 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] | |
217 | assert _number_extracter(50.0, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) | |
218 | ||
219 | ||
220 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
221 | @given(float) | |
222 | def test_number_extracter_raises_TypeError_if_given_a_number(x): | |
223 | with raises(TypeError): | |
224 | assert _number_extracter(x, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) | |
225 | ||
226 | ||
227 | def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example(): | |
228 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 0.5034] | |
229 | ||
230 | ||
231 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
232 | @given([float, py23_str, int]) | |
233 | def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x): | |
234 | assume(len(x) <= 10) | |
235 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
236 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
237 | assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, True, False, '') | |
238 | ||
239 | ||
240 | def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example(): | |
241 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] | |
242 | ||
243 | ||
244 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
245 | @given([float, py23_str, int]) | |
246 | def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x): | |
247 | assume(len(x) <= 10) | |
248 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
249 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
250 | assert _number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, True, False, '') | |
251 | ||
252 | ||
253 | def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example(): | |
254 | assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 5.034, 'e', -1.0] | |
255 | ||
256 | ||
257 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
258 | @given([float, py23_str, int]) | |
259 | def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x): | |
260 | assume(len(x) <= 10) | |
261 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
262 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
263 | assert _number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, False, False, '') | |
264 | ||
265 | ||
266 | def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example(): | |
267 | assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
268 | ||
269 | ||
270 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
271 | @given([float, py23_str, int]) | |
272 | def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x): | |
273 | assume(len(x) <= 10) | |
274 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
275 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
276 | assert _number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, False, False, '') | |
277 | ||
278 | ||
279 | def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example(): | |
280 | assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
281 | ||
282 | ||
283 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
284 | @given([float, py23_str, int]) | |
285 | @example([10000000000000000000000000000000000000000000000000000000000000000000000000, | |
286 | 100000000000000000000000000000000000000000000000000000000000000000000000000, | |
287 | 100000000000000000000000000000000000000000000000000000000000000000000000000]) | |
288 | def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x): | |
289 | assume(len(x) <= 10) | |
290 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
291 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, False, False, '') | |
292 | ||
293 | ||
294 | def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example(): | |
295 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, 5, '.', 34, 'e', -1] | |
296 | ||
297 | ||
298 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
299 | @given([float, py23_str, int]) | |
300 | def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x): | |
301 | assume(len(x) <= 10) | |
302 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
303 | assert _number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, True, False, '') | |
304 | ||
305 | ||
306 | def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option_example(): | |
307 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '', 0.5034] | |
308 | ||
309 | ||
310 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
311 | @given([float, py23_str, int]) | |
312 | def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(x): | |
313 | assume(len(x) <= 10) | |
314 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
315 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
316 | assert _number_extracter(s, _float_sign_exp_re, *float_safe_nolocale_nogroup) == float_splitter(s, True, True, True, '') | |
317 | ||
318 | ||
319 | def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option_example(): | |
320 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_safe_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1] | |
321 | ||
322 | ||
323 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
324 | @given([float, py23_str, int]) | |
325 | def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(x): | |
326 | assume(len(x) <= 10) | |
327 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
328 | assert _number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup) == int_splitter(s, True, True, '') | |
329 | ||
330 | ||
331 | def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent_example(): | |
332 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] | |
333 | ||
334 | ||
335 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example(): | |
336 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, 0.5034] | |
337 | ||
338 | ||
339 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe_exmple(): | |
340 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034] | |
341 | ||
342 | ||
343 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example(): | |
344 | assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nosafe_nolocale_group) == ['aA', 5.0, 0.5034] | |
345 | ||
346 | ||
347 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
348 | @given([float, py23_str, int]) | |
349 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x): | |
350 | assume(len(x) <= 10) | |
351 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
352 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
353 | t = float_splitter(s, True, True, False, '') | |
354 | t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t] | |
355 | assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_group) == t | |
356 | ||
357 | ||
358 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example(): | |
359 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] | |
360 | ||
361 | ||
362 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
363 | @given([float, py23_str, int]) | |
364 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x): | |
365 | assume(len(x) <= 10) | |
366 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
367 | t = int_splitter(s, False, False, '') | |
368 | t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t] | |
369 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_group) == t | |
370 | ||
371 | ||
372 | def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example(): | |
373 | load_locale('en_US') | |
374 | strxfrm = get_strxfrm() | |
375 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] | |
157 | 376 | locale.setlocale(locale.LC_NUMERIC, str('')) |
158 | 377 | |
159 | 378 | |
160 | def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(): | |
161 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
162 | if use_pyicu: | |
163 | from natsort.locale_help import get_pyicu_transform | |
164 | from locale import getlocale | |
165 | strxfrm = get_pyicu_transform(getlocale()) | |
166 | else: | |
167 | from natsort.locale_help import strxfrm | |
168 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] | |
379 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
380 | @given([float, py23_str, int]) | |
381 | def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x): | |
382 | assume(len(x) <= 10) | |
383 | load_locale('en_US') | |
384 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
385 | t = int_splitter(s, False, False, null_string) | |
386 | try: # Account for locale bug on Python 3.2 | |
387 | t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), False) for i, y in enumerate(t)] | |
388 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_nogroup) == t | |
389 | except OverflowError: | |
390 | pass | |
169 | 391 | locale.setlocale(locale.LC_NUMERIC, str('')) |
170 | 392 | |
171 | 393 | |
172 | def test_py3_safe_does_nothing_if_no_numbers(): | |
173 | assert _py3_safe(['a', 'b', 'c'], False) == ['a', 'b', 'c'] | |
174 | assert _py3_safe(['a'], False) == ['a'] | |
175 | ||
176 | ||
177 | def test_py3_safe_does_nothing_if_only_one_number(): | |
178 | assert _py3_safe(['a', 5], False) == ['a', 5] | |
179 | ||
180 | ||
181 | def test_py3_safe_inserts_empty_string_between_two_numbers(): | |
182 | assert _py3_safe([5, 9], False) == [5, '', 9] | |
183 | ||
184 | ||
185 | def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers(): | |
186 | assert _py3_safe([5, 9], True) == [5, null_string, 9] | |
187 | ||
188 | ||
189 | def test__natsort_key_with_float_splits_input_into_string_and_signed_float_with_exponent(): | |
394 | def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters_example(): | |
395 | load_locale('en_US') | |
396 | strxfrm = get_strxfrm() | |
397 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_group) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] | |
398 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
399 | ||
400 | ||
401 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
402 | @given([float, py23_str, int]) | |
403 | def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(x): | |
404 | assume(len(x) <= 10) | |
405 | load_locale('en_US') | |
406 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
407 | t = int_splitter(s, False, False, null_string) | |
408 | try: # Account for locale bug on Python 3.2 | |
409 | t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), True) for i, y in enumerate(t)] | |
410 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_group) == t | |
411 | except OverflowError: | |
412 | pass | |
413 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
414 | ||
415 | ||
416 | def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf(): | |
417 | assert _natsort_key('nan', None, ns.FLOAT) == ('', float('-inf')) | |
418 | assert _natsort_key(float('nan'), None, 0) == ('', float('-inf')) | |
419 | ||
420 | ||
421 | def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf(): | |
422 | assert _natsort_key('nan', None, ns.FLOAT | ns.NANLAST) == ('', float('+inf')) | |
423 | assert _natsort_key(float('nan'), None, ns.NANLAST) == ('', float('+inf')) | |
424 | assert ns.NL == ns.NANLAST | |
425 | ||
426 | ||
427 | # The remaining tests provide no examples, just hypothesis tests. | |
428 | # They only confirm that _natsort_key uses the above building blocks. | |
429 | ||
430 | ||
431 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
432 | @given([float, py23_str, int]) | |
433 | def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x): | |
434 | assume(len(x) <= 10) | |
435 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
436 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
190 | 437 | assert ns.F == ns.FLOAT |
191 | assert _natsort_key('a-5.034e2', None, ns.F) == ('a', -503.4) | |
192 | ||
193 | ||
194 | def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_signed_float_without_exponent(): | |
195 | assert _natsort_key('a-5.034e2', None, ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) | |
196 | # Default is to split on floats. | |
197 | assert _natsort_key('a-5.034e2', None, ns.NOEXP) == ('a', -5.034, 'e', 2.0) | |
198 | ||
199 | ||
200 | def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(): | |
201 | assert _natsort_key('a-5.034e2', None, ns.UNSIGNED) == ('a-', 503.4) | |
202 | ||
203 | ||
204 | def test__natsort_key_with_float_and_unsigned_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(): | |
205 | assert _natsort_key('a-5.034e2', None, ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) | |
206 | ||
207 | ||
208 | def test__natsort_key_with_int_splits_input_into_string_and_signed_int(): | |
209 | assert _natsort_key('a-5.034e2', None, ns.INT) == ('a', -5, '.', 34, 'e', 2) | |
438 | assert ns.S == ns.SIGNED | |
439 | assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) | |
440 | ||
441 | ||
442 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
443 | @given([float, py23_str, int]) | |
444 | def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x): | |
445 | assume(len(x) <= 10) | |
446 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
447 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
448 | assert ns.R == ns.F | ns.S | |
449 | assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) | |
450 | ||
451 | ||
452 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
453 | @given([float, py23_str, int]) | |
454 | def test__natsort_key_with_real_matches_signed_float(x): | |
455 | assume(len(x) <= 10) | |
456 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
457 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
458 | assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S) | |
459 | ||
460 | ||
461 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
462 | @given([float, py23_str, int]) | |
463 | def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x): | |
464 | assume(len(x) <= 10) | |
465 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
466 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
467 | assert ns.N == ns.NOEXP | |
468 | assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup)) | |
469 | ||
470 | ||
471 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
472 | @given([float, py23_str, int]) | |
473 | def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x): | |
474 | assume(len(x) <= 10) | |
475 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
476 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
477 | assert ns.U == ns.UNSIGNED | |
478 | assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) | |
479 | # Default is unsigned search | |
480 | assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) | |
481 | ||
482 | ||
483 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
484 | @given([float, py23_str, int]) | |
485 | def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x): | |
486 | assume(len(x) <= 10) | |
487 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
488 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
489 | assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup)) | |
490 | ||
491 | ||
492 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
493 | @given([float, py23_str, int]) | |
494 | def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x): | |
495 | assume(len(x) <= 10) | |
496 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
497 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
498 | assert ns.I == ns.INT | |
499 | assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
500 | # Default is int search | |
501 | assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
210 | 502 | # NOEXP is ignored for integers |
211 | assert _natsort_key('a-5.034e2', None, ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) | |
212 | ||
213 | ||
214 | def test__natsort_key_with_int_splits_and_unsigned_input_into_string_and_unsigned_int(): | |
215 | assert _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) | |
216 | ||
217 | ||
218 | def test__natsort_key_with_version_or_digit_matches_usigned_int(): | |
219 | assert _natsort_key('a-5.034e2', None, ns.VERSION) == _natsort_key('a-5.034e2', None, ns.INT | ns.UNSIGNED) | |
220 | assert _natsort_key('a-5.034e2', None, ns.DIGIT) == _natsort_key('a-5.034e2', None, ns.VERSION) | |
221 | ||
222 | ||
223 | def test__natsort_key_with_key_applies_key_function_before_splitting(): | |
224 | assert _natsort_key('a-5.034e2', lambda x: x.upper(), ns.F) == ('A', -503.4) | |
225 | ||
226 | ||
227 | def test__natsort_key_with_tuple_input_returns_nested_tuples(): | |
503 | assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
504 | ||
505 | ||
506 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
507 | @given([float, py23_str, int]) | |
508 | def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x): | |
509 | assume(len(x) <= 10) | |
510 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
511 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
512 | assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) | |
513 | assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) | |
514 | ||
515 | ||
516 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
517 | @given([float, py23_str, int]) | |
518 | def test__natsort_key_with_version_or_digit_matches_usigned_int(x): | |
519 | assume(len(x) <= 10) | |
520 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
521 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
522 | assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED) | |
523 | assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION) | |
524 | ||
525 | ||
526 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
527 | @given([float, py23_str, int]) | |
528 | def test__natsort_key_with_key_applies_key_function_before_splitting(x): | |
529 | assume(len(x) <= 10) | |
530 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
531 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
532 | assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
533 | ||
534 | ||
535 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
536 | @given([float, py23_str, int]) | |
537 | def test__natsort_key_with_tuple_input_returns_nested_tuples(x): | |
228 | 538 | # Iterables are parsed recursively so you can sort lists of lists. |
229 | assert _natsort_key(('a1', 'a-5.034e2'), None, ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) | |
230 | ||
231 | ||
232 | def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(): | |
539 | assume(len(x) <= 10) | |
540 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
541 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
542 | t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
543 | assert _natsort_key((s, s), None, ns.I) == (t, t) | |
544 | ||
545 | ||
546 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
547 | @given([float, py23_str, int]) | |
548 | def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x): | |
233 | 549 | # A key is applied before recursion, but not in the recursive calls. |
234 | assert _natsort_key(('a1', 'a-5.034e2'), itemgetter(1), ns.F) == ('a', -503.4) | |
235 | ||
236 | ||
237 | def test__natsort_key_with_input_containing_leading_numbers_returns_leading_empty_strings(): | |
238 | # Strings that lead with a number get an empty string at the front of the tuple. | |
239 | # This is designed to get around the "unorderable types" issue. | |
240 | assert _natsort_key(('15a', '6'), None, ns.F) == (('', 15.0, 'a'), ('', 6.0)) | |
241 | ||
242 | ||
243 | def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(): | |
244 | assert _natsort_key(10, None, ns.F) == ('', 10) | |
245 | ||
246 | ||
247 | def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): | |
248 | # Turn on PATH to split a file path into components | |
249 | assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
250 | ||
251 | ||
252 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): | |
253 | assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
254 | ||
255 | ||
256 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): | |
257 | assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
258 | ||
259 | ||
260 | def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): | |
261 | # Converts pathlib PurePath (and subclass) objects to string before sorting | |
262 | if has_pathlib: | |
263 | assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
264 | ||
265 | ||
266 | def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): | |
267 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
268 | # so it will sort against the other as_path results. | |
269 | assert _natsort_key(10, None, ns.PATH) == (('', 10),) | |
270 | ||
271 | ||
272 | def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): | |
273 | # PATH also handles recursion well. | |
274 | assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) | |
275 | ||
276 | ||
277 | def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(): | |
550 | assume(len(x) <= 10) | |
551 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
552 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
553 | t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
554 | assert _natsort_key((s, s), itemgetter(1), ns.I) == t | |
555 | ||
556 | ||
557 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
558 | @given(float) | |
559 | def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x): | |
560 | assume(not isnan(x)) | |
561 | if x.is_integer(): | |
562 | x = int(x) | |
563 | assert _natsort_key(x, None, ns.I) == ('', x) | |
564 | ||
565 | ||
566 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
567 | @given([float, py23_str, int]) | |
568 | def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(x): | |
278 | 569 | # Turn on TYPESAFE to put a '' between adjacent numbers |
279 | assert _natsort_key('43h7+3', None, ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) | |
570 | assume(len(x) <= 10) | |
571 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
572 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
573 | assert _natsort_key(s, None, ns.TYPESAFE | ns.S) == tuple(_number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup)) | |
280 | 574 | |
281 | 575 | |
282 | 576 | def test__natsort_key_with_invalid_alg_input_raises_ValueError(): |
286 | 580 | assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" |
287 | 581 | |
288 | 582 | |
289 | def test__natsort_key_without_string_modifiers_leaves_text_as_is(): | |
290 | # Changing the sort order of strings | |
291 | assert _natsort_key('Apple56', None, ns.F) == ('Apple', 56.0) | |
292 | ||
293 | ||
294 | def test__natsort_key_with_IGNORECASE_lowercases_text(): | |
295 | assert _natsort_key('Apple56', None, ns.IGNORECASE) == ('apple', 56.0) | |
296 | ||
297 | ||
298 | def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(): | |
299 | assert _natsort_key('Apple56', None, ns.LOWERCASEFIRST) == ('aPPLE', 56.0) | |
300 | ||
301 | ||
302 | def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(): | |
303 | assert _natsort_key('Apple56', None, ns.GROUPLETTERS) == ('aAppppllee', 56.0) | |
304 | ||
305 | ||
306 | def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(): | |
307 | assert _natsort_key('Apple56', None, ns.G | ns.LF) == ('aapPpPlLeE', 56.0) | |
583 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
584 | @given([float, py23_str, int]) | |
585 | def test__natsort_key_with_IGNORECASE_lowercases_text(x): | |
586 | assume(len(x) <= 10) | |
587 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
588 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
589 | try: | |
590 | assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
591 | except AttributeError: | |
592 | assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
593 | ||
594 | ||
595 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
596 | @given([float, py23_str, int]) | |
597 | def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x): | |
598 | assume(len(x) <= 10) | |
599 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
600 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
601 | assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
602 | ||
603 | ||
604 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
605 | @given([float, py23_str, int]) | |
606 | def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x): | |
607 | assume(len(x) <= 10) | |
608 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
609 | s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) | |
610 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
611 | t = _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) | |
612 | assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) | |
613 | ||
614 | ||
615 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
616 | @given([float, py23_str, int]) | |
617 | def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x): | |
618 | assume(len(x) <= 10) | |
619 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
620 | s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) | |
621 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
622 | t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup) | |
623 | assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) | |
308 | 624 | |
309 | 625 | |
310 | 626 | def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple(): |
317 | 633 | assert True |
318 | 634 | |
319 | 635 | |
320 | def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(): | |
636 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
637 | @given([float, py23_str, int]) | |
638 | def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x): | |
321 | 639 | # Locale aware sorting |
322 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
323 | if use_pyicu: | |
324 | from natsort.locale_help import get_pyicu_transform | |
325 | from locale import getlocale | |
326 | strxfrm = get_pyicu_transform(getlocale()) | |
640 | assume(len(x) <= 10) | |
641 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
642 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
643 | load_locale('en_US') | |
644 | if dumb_sort(): | |
645 | assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) | |
327 | 646 | else: |
328 | from natsort.locale_help import strxfrm | |
329 | assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
330 | assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) | |
331 | ||
332 | locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) | |
333 | if use_pyicu: | |
334 | strxfrm = get_pyicu_transform(getlocale()) | |
335 | assert _natsort_key('Apple56.5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
336 | assert _natsort_key('Apple56,5', None, ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
647 | assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) | |
337 | 648 | locale.setlocale(locale.LC_NUMERIC, str('')) |
338 | 649 | |
339 | 650 | |
340 | def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(): | |
651 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
652 | @given([float, py23_str, int]) | |
653 | def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x): | |
341 | 654 | # Locale aware sorting |
342 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
343 | if use_pyicu: | |
344 | from natsort.locale_help import get_pyicu_transform | |
345 | from locale import getlocale | |
346 | strxfrm = get_pyicu_transform(getlocale()) | |
655 | assume(len(x) <= 10) | |
656 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
657 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
658 | load_locale('en_US') | |
659 | if dumb_sort(): | |
660 | t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) | |
347 | 661 | else: |
348 | from natsort.locale_help import strxfrm | |
349 | assert _natsort_key('Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm(' Apple'), 56.5) | |
350 | assert _natsort_key('apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (strxfrm('apple'), 56.5) | |
351 | assert _natsort_key('12Apple56.5', None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == (null_string, 12.0, strxfrm('Apple'), 56.5) | |
662 | t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) | |
663 | if not t: | |
664 | r = (t, t) | |
665 | elif t[0] is null_string: | |
666 | r = ((b'' if use_pyicu else '',), t) | |
667 | else: | |
668 | r = ((s[0],), t) | |
669 | assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r | |
352 | 670 | # The below are all aliases for UNGROUPLETTERS |
353 | 671 | assert ns.UNGROUPLETTERS == ns.UG |
354 | 672 | assert ns.UNGROUPLETTERS == ns.CAPITALFIRST |
356 | 674 | locale.setlocale(locale.LC_NUMERIC, str('')) |
357 | 675 | |
358 | 676 | |
359 | def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(): | |
360 | assert _natsort_key('Apple56.5', None, ns.UG | ns.I) == _natsort_key('Apple56.5', None, ns.I) | |
677 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
678 | @given([float, py23_str, int]) | |
679 | def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x): | |
680 | assume(len(x) <= 10) | |
681 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
682 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
683 | assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I) | |
684 | ||
685 | ||
686 | # It is difficult to generate code that will create random filesystem paths, | |
687 | # so "example" based tests are given for the PATH option. | |
688 | ||
689 | ||
690 | def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): | |
691 | # Turn on PATH to split a file path into components | |
692 | assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
693 | ||
694 | ||
695 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): | |
696 | assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
697 | ||
698 | ||
699 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): | |
700 | assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
701 | ||
702 | ||
703 | def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): | |
704 | # Converts pathlib PurePath (and subclass) objects to string before sorting | |
705 | assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
706 | ||
707 | ||
708 | def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): | |
709 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
710 | # so it will sort against the other as_path results. | |
711 | assert _natsort_key(10, None, ns.PATH) == (('', 10),) | |
712 | ||
713 | ||
714 | def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): | |
715 | # PATH also handles recursion well. | |
716 | assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) |
0 | # Tox (http://tox.testrun.org/) is a tool for running tests | |
1 | # in multiple virtualenvs. This configuration file will run the | |
2 | # test suite on all supported python versions. To use it, "pip install tox" | |
3 | # and then run "tox" from this directory. | |
4 | ||
5 | [tox] | |
6 | envlist = | |
7 | py26, py27, py32, py33, py34, pypy | |
8 | ||
9 | [testenv] | |
10 | commands = {envpython} setup.py test | |
11 | deps = pytest |