Imported Upstream version 3.5.1
Agustin Henze
9 years ago
7 | 7 | raise AssertionError |
8 | 8 | raise NotImplementedError |
9 | 9 | raise$ |
10 | ||
11 | # Don't complain about alternate imports | |
12 | except ImportError | |
13 | 10 | |
14 | 11 | # Don't complain if non-runnable code isn't run: |
15 | 12 | if 0: |
5 | 5 | - 3.3 |
6 | 6 | - 3.4 |
7 | 7 | env: |
8 | - WITH_FASTNUMBERS=true | |
9 | - WITH_FASTNUMBERS=false | |
8 | - WITH_OPTIONS=true | |
9 | - WITH_OPTIONS=false | |
10 | before_install: | |
11 | - sudo apt-get update | |
12 | - sudo locale-gen de_DE.UTF-8 | |
10 | 13 | install: |
14 | - if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi | |
15 | - if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi | |
16 | - if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi | |
17 | - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi | |
11 | 18 | - pip install pytest-cov pytest-flakes pytest-pep8 |
12 | 19 | - pip install coveralls |
13 | - if [[ $WITH_FASTNUMBERS == true ]]; then pip install fastnumbers; fi | |
14 | - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi | |
15 | 20 | script: |
16 | 21 | - python -m pytest --cov natsort --flakes --pep8 |
17 | 22 | - python -m pytest --doctest-modules natsort |
4 | 4 | include natsort/__main__.py |
5 | 5 | include natsort/__init__.py |
6 | 6 | include natsort/py23compat.py |
7 | include natsort/locale_help.py | |
8 | include natsort/fake_fastnumbers.py | |
7 | 9 | include test_natsort/profile_natsorted.py |
8 | 10 | include test_natsort/stress_natsort.py |
9 | 11 | include test_natsort/test_natsort.py |
12 | include test_natsort/test_locale_help.py | |
13 | include test_natsort/test_fake_fastnumbers.py | |
10 | 14 | include test_natsort/test_main.py |
11 | 15 | include setup.py |
12 | 16 | include setup.cfg |
48 | 48 | >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work |
49 | 49 | ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] |
50 | 50 | |
51 | You can also perform locale-aware sorting (or "human sorting"), where the | |
52 | non-numeric characters are ordered based on their meaning, not on their | |
53 | ordinal value; this can be achieved with the ``humansorted`` function:: | |
54 | ||
55 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
56 | >>> natsorted(a) | |
57 | ['Apple', 'Banana', 'apple', 'banana'] | |
58 | >>> import locale | |
59 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
60 | 'en_US.UTF-8' | |
61 | >>> from natsort import humansorted | |
62 | >>> humansorted(a) | |
63 | ['apple', 'Apple', 'banana', 'Banana'] | |
64 | ||
65 | You may find you need to explicitly set the locale to get this to work | |
66 | (as shown in the example). | |
67 | Please see the `following caveat <http://pythonhosted.org//natsort/examples.html#bug-note>`_ | |
68 | and the "Optional Dependencies" section | |
69 | below before using the ``humansorted`` function. | |
70 | ||
51 | 71 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
52 | 72 | when you sort:: |
53 | 73 | |
60 | 80 | The natsort algorithm does other fancy things like |
61 | 81 | |
62 | 82 | - recursively descend into lists of lists |
83 | - control the case-sensitivity | |
63 | 84 | - sort file paths correctly |
64 | 85 | - allow custom sorting keys |
65 | 86 | - exposes a natsort_key generator to pass to list.sort |
72 | 93 | |
73 | 94 | ``natsort`` comes with a shell script called ``natsort``, or can also be called |
74 | 95 | from the command line with ``python -m natsort``. The command line script is |
75 | only installed onto your ``PATH`` if you don't install via a wheel. There is | |
76 | apparently a known bug with the wheel installation process that will not create | |
77 | entry points. | |
96 | only installed onto your ``PATH`` if you don't install via a wheel. | |
78 | 97 | |
79 | 98 | Requirements |
80 | 99 | ------------ |
83 | 102 | (this includes python 3.x). To run version 2.6, 3.0, or 3.1 the |
84 | 103 | `argparse <https://pypi.python.org/pypi/argparse>`_ module is required. |
85 | 104 | |
86 | Optional Dependency | |
87 | ------------------- | |
105 | Optional Dependencies | |
106 | --------------------- | |
107 | ||
108 | fastnumbers | |
109 | ''''''''''' | |
88 | 110 | |
89 | 111 | The most efficient sorting can occur if you install the |
90 | 112 | `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps |
91 | 113 | with the string to number conversions.) ``natsort`` will still run (efficiently) |
92 | 114 | without the package, but if you need to squeeze out that extra juice it is |
93 | 115 | recommended you include this as a dependency. ``natsort`` will not require (or |
94 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. | |
116 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed | |
117 | at installation. | |
118 | ||
119 | PyICU | |
120 | ''''' | |
121 | ||
122 | On some systems, Python's ``locale`` library can be buggy (I have found this to be | |
123 | the case on Mac OS X), so ``natsort`` will use | |
124 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed | |
125 | on your computer; this will give more reliable results. ``natsort`` will not | |
126 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed | |
127 | at installation. | |
95 | 128 | |
96 | 129 | Depreciation Notices |
97 | 130 | -------------------- |
98 | 131 | |
132 | - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, | |
133 | ``as_path``, and ``py3_safe`` options will be removed from the (documented) | |
134 | API, in favor of the ``alg`` option and ``ns`` enum. They will remain as | |
135 | keyword-only arguments after that (for the foreseeable future). | |
99 | 136 | - In ``natsort`` version 4.0.0, the ``natsort_key`` function will be removed |
100 | 137 | from the public API. All future development should use ``natsort_keygen`` |
101 | 138 | in preparation for this. |
117 | 154 | These are the last three entries of the changelog. See the package documentation |
118 | 155 | for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. |
119 | 156 | |
157 | 09-25-2014 v. 3.5.1 | |
158 | ''''''''''''''''''' | |
159 | ||
160 | - Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST' | |
161 | or 'ns.IGNORECASE'. | |
162 | - Refactored modules so that only the public API was in natsort.py and | |
163 | ns_enum.py. | |
164 | - Refactored all import statements to be absolute, not relative. | |
165 | ||
166 | 09-02-2014 v. 3.5.0 | |
167 | ''''''''''''''''''' | |
168 | ||
169 | - Added the 'alg' argument to the 'natsort' functions. This argument | |
170 | accepts an enum that is used to indicate the options the user wishes | |
171 | to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe' | |
172 | options are being depreciated and will become (undocumented) | |
173 | keyword-only options in natsort version 4.0.0. | |
174 | - The user can now modify how 'natsort' handles the case of non-numeric | |
175 | characters. | |
176 | - The user can now instruct 'natsort' to use locale-aware sorting, which | |
177 | allows 'natsort' to perform true "human sorting". | |
178 | ||
179 | - The `humansorted` convenience function has been included to make this | |
180 | easier. | |
181 | ||
182 | - Updated shell script with locale functionality. | |
183 | ||
120 | 184 | 08-12-2014 v. 3.4.1 |
121 | 185 | ''''''''''''''''''' |
122 | 186 | |
125 | 189 | enhancements. |
126 | 190 | - Made documentation point to more 'natsort' resources, and also added a |
127 | 191 | new example in the examples section. |
128 | ||
129 | 07-19-2014 v. 3.4.0 | |
130 | ''''''''''''''''''' | |
131 | ||
132 | - Fixed a bug that caused user's options to the 'natsort_key' to not be | |
133 | passed on to recursive calls of 'natsort_key'. | |
134 | - Added a 'natsort_keygen' function that will generate a wrapped version | |
135 | of 'natsort_key' that is easier to call. 'natsort_key' is now set to | |
136 | depreciate at natsort version 4.0.0. | |
137 | - Added an 'as_path' option to 'natsorted' & co. that will try to treat | |
138 | input strings as filepaths. This will help yield correct results for | |
139 | OS-generated inputs like | |
140 | ``['/p/q/o.x', '/p/q (1)/o.x', '/p/q (10)/o.x', '/p/q/o (1).x']``. | |
141 | - Massive performance enhancements for string input (1.8x-2.0x), at the expense | |
142 | of reduction in speed for numeric input (~2.0x). | |
143 | ||
144 | - This is a good compromise because the most common input will be strings, | |
145 | not numbers, and sorting numbers still only takes 0.6x the time of sorting | |
146 | strings. If you are sorting only numbers, you would use 'sorted' anyway. | |
147 | ||
148 | - Added the 'order_by_index' function to help in using the output of | |
149 | 'index_natsorted' and 'index_versorted'. | |
150 | - Added the 'reverse' option to 'natsorted' & co. to make it's API more | |
151 | similar to the builtin 'sorted'. | |
152 | - Added more unit tests. | |
153 | - Added auxiliary test code that helps in profiling and stress-testing. | |
154 | - Reworked the documentation, moving most of it to PyPI's hosting platform. | |
155 | - Added support for coveralls.io. | |
156 | - Entire codebase is now PyFlakes and PEP8 compliant. | |
157 | ||
158 | 06-28-2014 v. 3.3.0 | |
159 | ''''''''''''''''''' | |
160 | ||
161 | - Added a 'versorted' method for more convenient sorting of versions. | |
162 | - Updated command-line tool --number_type option with 'version' and 'ver' | |
163 | to make it more clear how to sort version numbers. | |
164 | - Moved unit-testing mechanism from being docstring-based to actual unit tests | |
165 | in actual functions. | |
166 | ||
167 | - This has provided the ability determine the coverage of the unit tests (99%). | |
168 | - This also makes the pydoc documentation a bit more clear. | |
169 | ||
170 | - Made docstrings for public functions mirror the README API. | |
171 | - Connected natsort development to Travis-CI to help ensure quality releases. |
12 | 12 | natsort_key.rst |
13 | 13 | natsorted.rst |
14 | 14 | versorted.rst |
15 | humansorted.rst | |
15 | 16 | index_natsorted.rst |
16 | 17 | index_versorted.rst |
18 | index_humansorted.rst | |
17 | 19 | order_by_index.rst |
20 | ns_class.rst |
1 | 1 | |
2 | 2 | Changelog |
3 | 3 | --------- |
4 | ||
5 | 09-25-2014 v. 3.5.1 | |
6 | ''''''''''''''''''' | |
7 | ||
8 | - Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST' | |
9 | or 'ns.IGNORECASE'. | |
10 | - Refactored modules so that only the public API was in natsort.py and | |
11 | ns_enum.py. | |
12 | - Refactored all import statements to be absolute, not relative. | |
13 | ||
14 | ||
15 | 09-02-2014 v. 3.5.0 | |
16 | ''''''''''''''''''' | |
17 | ||
18 | - Added the 'alg' argument to the 'natsort' functions. This argument | |
19 | accepts an enum that is used to indicate the options the user wishes | |
20 | to use. The 'number_type', 'signed', 'exp', 'as_path', and 'py3_safe' | |
21 | options are being depreciated and will become (undocumented) | |
22 | keyword-only options in natsort version 4.0.0. | |
23 | - The user can now modify how 'natsort' handles the case of non-numeric | |
24 | characters. | |
25 | - The user can now instruct 'natsort' to use locale-aware sorting, which | |
26 | allows 'natsort' to perform true "human sorting". | |
27 | ||
28 | - The `humansorted` convenience function has been included to make this | |
29 | easier. | |
30 | ||
31 | - Updated shell script with locale functionality. | |
4 | 32 | |
5 | 33 | 08-12-2014 v. 3.4.1 |
6 | 34 | ''''''''''''''''''' |
42 | 42 | # ones. |
43 | 43 | extensions = [ |
44 | 44 | 'sphinx.ext.autodoc', |
45 | 'sphinx.ext.autosummary', | |
45 | 46 | 'sphinx.ext.intersphinx', |
46 | 47 | 'numpydoc', |
47 | 48 | ] |
17 | 17 | >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] |
18 | 18 | >>> sorted(a) |
19 | 19 | ['a5.034e1', 'a50', 'a50.300', 'a50.4', 'a51.'] |
20 | >>> from natsort import natsorted | |
20 | >>> from natsort import natsorted, ns | |
21 | 21 | >>> natsorted(a) |
22 | 22 | ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] |
23 | ||
24 | Customizing Float Definition | |
25 | ---------------------------- | |
26 | ||
27 | By default :func:`~natsorted` searches for any float that would be | |
28 | a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. | |
29 | Perhaps you don't want to search for signed numbers, or you don't | |
30 | want to search for exponential notation, and the ``signed`` and | |
31 | ``exp`` options allow you to do this:: | |
32 | ||
33 | >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] | |
34 | >>> natsorted(a) | |
35 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
36 | >>> natsorted(a, signed=False) | |
37 | ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] | |
38 | >>> natsorted(a, exp=False) | |
39 | ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] | |
40 | 23 | |
41 | 24 | Sort Version Numbers |
42 | 25 | -------------------- |
48 | 31 | >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] |
49 | 32 | >>> natsorted(a) # This gives incorrect results |
50 | 33 | ['ver-2.9.9a', 'ver-2.9.9b', 'ver-1.11', 'ver-1.11.4', 'ver-1.10.1'] |
51 | >>> natsorted(a, number_type=int, signed=False) | |
52 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
53 | >>> natsorted(a, number_type=None) | |
34 | >>> natsorted(a, alg=ns.INT | ns.UNSIGNED) | |
35 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] | |
36 | >>> natsorted(a, alg=ns.VERSION) | |
54 | 37 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] |
55 | 38 | >>> from natsort import versorted |
56 | 39 | >>> versorted(a) |
57 | 40 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] |
58 | 41 | |
59 | You can see that ``number_type=None`` is a shortcut for ``number_type=int`` | |
60 | and ``signed=False``, and the :func:`~versorted` is a shortcut for | |
61 | ``natsorted(number_type=None)``. The recommend manner to sort version | |
42 | You can see that ``alg=ns.VERSION`` is a shortcut for | |
43 | ``alg=ns.INT | ns.UNSIGNED``, and the :func:`~versorted` is a shortcut for | |
44 | ``natsorted(alg=ns.VERSION)``. The recommend manner to sort version | |
62 | 45 | numbers is to use :func:`~versorted`. |
63 | 46 | |
64 | 47 | Sorting with Alpha, Beta, and Release Candidates |
67 | 50 | By default, if you wish to sort versions with a non-strict versioning |
68 | 51 | scheme, you may not get the results you expect:: |
69 | 52 | |
70 | >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta', '1.2alpha', '1.2.1', '1.1', '1.3'] | |
53 | >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] | |
71 | 54 | >>> versorted(a) |
72 | ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.3'] | |
55 | ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] | |
73 | 56 | |
74 | 57 | To make the '1.2' pre-releases come before '1.2.1', you need to use the following |
75 | 58 | recipe:: |
76 | 59 | |
77 | 60 | >>> versorted(a, key=lambda x: x.replace('.', '~')) |
78 | ['1.1', '1.2', '1.2alpha', '1.2beta', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] | |
61 | ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] | |
62 | ||
63 | If you also want '1.2' after all the alpha, beta, and rc candidates, you can | |
64 | modify the above recipe:: | |
65 | ||
66 | >>> versorted(a, key=lambda x: x.replace('.', '~')+'z') | |
67 | ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] | |
79 | 68 | |
80 | 69 | Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to |
81 | 70 | see why this works. |
85 | 74 | |
86 | 75 | In some cases when sorting file paths with OS-Generated names, the default |
87 | 76 | :mod:`~natsorted` algorithm may not be sufficient. In cases like these, |
88 | you may need to use the ``as_path`` option:: | |
77 | you may need to use the ``ns.PATH`` option:: | |
89 | 78 | |
90 | 79 | >>> a = ['./folder/file (1).txt', |
91 | 80 | ... './folder/file.txt', |
93 | 82 | ... './folder (10)/file.txt'] |
94 | 83 | >>> natsorted(a) |
95 | 84 | ['./folder (1)/file.txt', './folder (10)/file.txt', './folder/file (1).txt', './folder/file.txt'] |
96 | >>> natsorted(a, as_path=True) | |
85 | >>> natsorted(a, alg=ns.PATH) | |
97 | 86 | ['./folder/file.txt', './folder/file (1).txt', './folder (1)/file.txt', './folder (10)/file.txt'] |
87 | ||
88 | Locale-Aware Sorting (Human Sorting) | |
89 | ------------------------------------ | |
90 | ||
91 | You can instruct :mod:`natsort` to use locale-aware sorting with the | |
92 | ``ns.LOCALE`` option. In addition to making this understand non-ASCII | |
93 | characters, it will also properly interpret non-'.' decimal separators | |
94 | and also properly order case. It may be more convenient to just use | |
95 | the :func:`humansorted` function:: | |
96 | ||
97 | >>> from natsort import humansorted | |
98 | >>> import locale | |
99 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
100 | 'en_US.UTF-8' | |
101 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
102 | >>> natsorted(a, alg=ns.LOCALE) | |
103 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
104 | >>> humansorted(a) | |
105 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
106 | ||
107 | You may find that if you do not explicitly set the locale your results may not | |
108 | be as you expect... I have found that it depends on the system you are on. | |
109 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see below) then | |
110 | you should not need to do this. | |
111 | ||
112 | .. _bug_note: | |
113 | ||
114 | A Note For Bugs With Locale-Aware Sorting | |
115 | +++++++++++++++++++++++++++++++++++++++++ | |
116 | ||
117 | If you find that ``ns.LOCALE`` (or :func:`~humansorted`) does not give | |
118 | the results you expect, before filing a bug report please try to first install | |
119 | `PyICU <https://pypi.python.org/pypi/PyICU>`_. There are some known bugs | |
120 | with the `locale` module from the standard library that are solved when | |
121 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_. | |
122 | ||
123 | Controlling Case When Sorting | |
124 | ----------------------------- | |
125 | ||
126 | For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e. | |
127 | it sorts by the character's value in the ASCII table). For example:: | |
128 | ||
129 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
130 | >>> natsorted(a) | |
131 | ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
132 | ||
133 | There are times when you wish to ignore the case when sorting, | |
134 | you can easily do this with the ``ns.IGNORECASE`` option:: | |
135 | ||
136 | >>> natsorted(a, alg=ns.IGNORECASE) | |
137 | ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
138 | ||
139 | Note thats since Python's sorting is stable, the order of equivalent | |
140 | elements after lowering the case is the same order they appear in the | |
141 | original list. | |
142 | ||
143 | Upper-case letters appear first in the ASCII table, but many natural | |
144 | sorting methods place lower-case first. To do this, use | |
145 | ``ns.LOWERCASEFIRST``:: | |
146 | ||
147 | >>> natsorted(a, alg=ns.LOWERCASEFIRST) | |
148 | ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
149 | ||
150 | It may be undesirable to have the upper-case letters grouped together | |
151 | and the lower-case letters grouped together; most would expect all | |
152 | "a"s to bet together regardless of case, and all "b"s, and so on. To | |
153 | achieve this, use ``ns.GROUPLETTERS``:: | |
154 | ||
155 | >>> natsorted(a, alg=ns.GROUPLETTERS) | |
156 | ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
157 | ||
158 | You might combine this with ``ns.LOWERCASEFIRST`` to get what most | |
159 | would expect to be "natural" sorting:: | |
160 | ||
161 | >>> natsorted(a, alg=ns.G | ns.LF) | |
162 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
163 | ||
164 | Customizing Float Definition | |
165 | ---------------------------- | |
166 | ||
167 | By default :func:`~natsorted` searches for any float that would be | |
168 | a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. | |
169 | Perhaps you don't want to search for signed numbers, or you don't | |
170 | want to search for exponential notation, the ``ns.UNSIGNED`` and | |
171 | ``ns.NOEXP`` options allow you to do this:: | |
172 | ||
173 | >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] | |
174 | >>> natsorted(a) | |
175 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
176 | >>> natsorted(a, alg=ns.UNSIGNED) | |
177 | ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] | |
178 | >>> natsorted(a, alg=ns.NOEXP) | |
179 | ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] | |
98 | 180 | |
99 | 181 | Using a Custom Sorting Key |
100 | 182 | -------------------------- |
128 | 210 | >>> a.sort(key=natsort_key) |
129 | 211 | >>> a |
130 | 212 | ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] |
131 | >>> versort_key = natsort_keygen(number_type=None) | |
213 | >>> versort_key = natsort_keygen(alg=ns.VERSION) | |
132 | 214 | >>> a = ['ver-2.9.9a', 'ver-1.11', 'ver-2.9.9b', 'ver-1.11.4', 'ver-1.10.1'] |
133 | 215 | >>> a.sort(key=versort_key) |
134 | 216 | >>> a |
135 | 217 | ['ver-1.10.1', 'ver-1.11', 'ver-1.11.4', 'ver-2.9.9a', 'ver-2.9.9b'] |
136 | 218 | |
137 | :func:`~natsort_keygen` has the same API as :func:`~natsorted`. | |
219 | :func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the | |
220 | `reverse` option). | |
138 | 221 | |
139 | 222 | Sorting Multiple Lists According to a Single List |
140 | 223 | ------------------------------------------------- |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | :func:`~natsort.humansorted` | |
4 | ============================ | |
5 | ||
6 | .. autofunction:: humansorted | |
7 |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | :func:`~natsort.index_humansorted` | |
4 | ================================== | |
5 | ||
6 | .. autofunction:: index_humansorted | |
7 |
58 | 58 | >>> natsorted(a) # natsorted tries to sort as signed floats, so it won't work |
59 | 59 | ['version-2.0', 'version-1.9', 'version-1.11', 'version-1.10'] |
60 | 60 | |
61 | You can also perform locale-aware sorting (or "human sorting"), where the | |
62 | non-numeric characters are ordered based on their meaning, not on their | |
63 | ordinal value; this can be achieved with the ``humansorted`` function:: | |
64 | ||
65 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
66 | >>> natsorted(a) | |
67 | ['Apple', 'Banana', 'apple', 'banana'] | |
68 | >>> import locale | |
69 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
70 | 'en_US.UTF-8' | |
71 | >>> from natsort import humansorted | |
72 | >>> humansorted(a) | |
73 | ['apple', 'Apple', 'banana', 'Banana'] | |
74 | ||
75 | You may find you need to explicitly set the locale to get this to work | |
76 | (as shown in the example). | |
77 | Please see :ref:`bug_note` and the Installation section | |
78 | below before using the ``humansorted`` function. | |
79 | ||
61 | 80 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
62 | 81 | when you sort:: |
63 | 82 | |
70 | 89 | The natsort algorithm does other fancy things like |
71 | 90 | |
72 | 91 | - recursively descend into lists of lists |
92 | - control the case-sensitivity | |
73 | 93 | - sort file paths correctly |
74 | 94 | - allow custom sorting keys |
75 | - allow exposed a natsort_key generator to pass to list.sort | |
95 | - exposes a natsort_key generator to pass to list.sort | |
76 | 96 | |
77 | 97 | Please see the :ref:`examples` for a quick start guide, or the :ref:`api` |
78 | 98 | for more details. |
118 | 138 | recommended you include this as a dependency. ``natsort`` will not require (or |
119 | 139 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. |
120 | 140 | |
141 | On some systems, Python's ``locale`` library can be buggy (I have found this to be | |
142 | the case on Mac OS X), so ``natsort`` will use | |
143 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ under the hood if it is installed | |
144 | on your computer; this will give more reliable results. ``natsort`` will not | |
145 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed | |
146 | at installation. | |
147 | ||
121 | 148 | :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called |
122 | 149 | from the command line with ``python -m natsort``. The command line script is |
123 | 150 | only installed onto your ``PATH`` if you don't install via a wheel. There is |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | :class:`~natsort.ns` | |
4 | ==================== | |
5 | ||
6 | .. autoclass:: ns | |
7 |
6 | 6 | ============ |
7 | 7 | |
8 | 8 | The ``natsort`` shell script is automatically installed when you install |
9 | :mod:`natsort` from "zip" or "tar.gz" via ``pip`` or ``easy_install`` | |
10 | (there is a known bug with wheels that will not install the shell script). | |
9 | :mod:`natsort` with pip. | |
11 | 10 | |
12 | 11 | Below is the usage and some usage examples for the ``natsort`` shell script. |
13 | 12 | |
16 | 15 | |
17 | 16 | :: |
18 | 17 | |
19 | usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] | |
20 | [-e EXCLUDE] [-r] [-t {digit,int,float,version,ver}] | |
21 | [--nosign] [--noexp] | |
18 | usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] [-e EXCLUDE] | |
19 | [-r] [-t {digit,int,float,version,ver}] [--nosign] [--noexp] | |
20 | [--locale] | |
22 | 21 | [entries [entries ...]] |
23 | 22 | |
24 | 23 | Performs a natural sort on entries given on the command-line. |
58 | 57 | --noexp Do not consider an exponential as part of a number, |
59 | 58 | i.e. 1e4, would be considered as 1, "e", and 4, not as |
60 | 59 | 10000. This only effects the --number-type=float. |
61 | ||
60 | --locale, -l Causes natsort to use locale-aware sorting. On some | |
61 | systems, the underlying C library is broken, so if you | |
62 | get results that you do not expect please install | |
63 | PyICU and try again. | |
62 | 64 | Description |
63 | 65 | ----------- |
64 | 66 |
1 | 1 | from __future__ import (print_function, division, |
2 | 2 | unicode_literals, absolute_import) |
3 | 3 | |
4 | from .natsort import (natsort_key, natsort_keygen, natsorted, | |
5 | index_natsorted, versorted, index_versorted, | |
6 | order_by_index) | |
7 | from ._version import __version__ | |
4 | # Local imports. | |
5 | from natsort.natsort import (natsort_key, natsort_keygen, ns, | |
6 | natsorted, humansorted, versorted, | |
7 | index_natsorted, index_versorted, | |
8 | index_humansorted, order_by_index) | |
9 | from natsort._version import __version__ | |
8 | 10 | |
9 | 11 | __all__ = [ |
10 | 12 | 'natsort_key', |
11 | 13 | 'natsort_keygen', |
12 | 14 | 'natsorted', |
13 | 15 | 'versorted' |
16 | 'humansorted', | |
14 | 17 | 'index_natsorted', |
15 | 18 | 'index_versorted', |
19 | 'index_humansorted', | |
16 | 20 | 'order_by_index', |
21 | 'ns', | |
17 | 22 | ] |
1 | 1 | from __future__ import (print_function, division, |
2 | 2 | unicode_literals, absolute_import) |
3 | 3 | |
4 | # Std. lib imports. | |
4 | 5 | import sys |
5 | 6 | |
6 | from .natsort import natsorted, regex_and_num_function_chooser | |
7 | from ._version import __version__ | |
8 | from .py23compat import py23_str | |
7 | # Local imports. | |
8 | from natsort.natsort import natsorted, ns | |
9 | from natsort.utils import _regex_and_num_function_chooser | |
10 | from natsort._version import __version__ | |
11 | from natsort.py23compat import py23_str | |
9 | 12 | |
10 | 13 | |
11 | 14 | def main(): |
61 | 64 | help='Do not consider an exponential as part of a number, i.e. 1e4, ' |
62 | 65 | 'would be considered as 1, "e", and 4, not as 10000. This only ' |
63 | 66 | 'effects the --number-type=float.') |
67 | parser.add_argument( | |
68 | '--locale', '-l', action='store_true', default=False, | |
69 | help='Causes natsort to use locale-aware sorting. On some systems, ' | |
70 | 'the underlying C library is broken, so if you get results that ' | |
71 | 'you do not expect please install PyICU and try again.') | |
64 | 72 | parser.add_argument( |
65 | 73 | 'entries', nargs='*', default=sys.stdin, |
66 | 74 | help='The entries to sort. Taken from stdin if nothing is given on ' |
134 | 142 | """Sort the entries, applying the filters first if necessary.""" |
135 | 143 | |
136 | 144 | # Extract the proper number type. |
137 | kwargs = {'number_type': {'digit': None, | |
138 | 'version': None, | |
139 | 'ver': None, | |
140 | 'int': int, | |
141 | 'float': float}[args.number_type], | |
142 | 'signed': args.signed, | |
143 | 'exp': args.exp, | |
144 | 'as_path': args.paths, | |
145 | 'reverse': args.reverse, } | |
145 | num_type = {'digit': None, | |
146 | 'version': None, | |
147 | 'ver': None, | |
148 | 'int': int, | |
149 | 'float': float}[args.number_type] | |
150 | unsigned = not args.signed or num_type is None | |
151 | alg = (ns.INT * int(num_type in (int, None)) | | |
152 | ns.UNSIGNED * unsigned | | |
153 | ns.NOEXP * (not args.exp) | | |
154 | ns.PATH * args.paths | | |
155 | ns.LOCALE * args.locale) | |
146 | 156 | |
147 | 157 | # Pre-remove entries that don't pass the filtering criteria |
148 | 158 | # Make sure we use the same searching algorithm for filtering |
149 | 159 | # as for sorting. |
150 | 160 | do_filter = args.filter is not None or args.reverse_filter is not None |
151 | 161 | if do_filter or args.exclude: |
152 | inp_options = (kwargs['number_type'], args.signed, args.exp) | |
153 | regex, num_function = regex_and_num_function_chooser[inp_options] | |
162 | inp_options = (ns.INT * int(num_type in (int, None)) | | |
163 | ns.UNSIGNED * unsigned | | |
164 | ns.NOEXP * (not args.exp), | |
165 | '.' | |
166 | ) | |
167 | regex, num_function = _regex_and_num_function_chooser[inp_options] | |
154 | 168 | if args.filter is not None: |
155 | 169 | lows, highs = ([f[0] for f in args.filter], |
156 | 170 | [f[1] for f in args.filter]) |
170 | 184 | num_function, regex)] |
171 | 185 | |
172 | 186 | # Print off the sorted results |
173 | for entry in natsorted(entries, **kwargs): | |
187 | for entry in natsorted(entries, reverse=args.reverse, alg=alg): | |
174 | 188 | print(entry) |
175 | 189 | |
176 | 190 |
1 | 1 | from __future__ import (print_function, division, |
2 | 2 | unicode_literals, absolute_import) |
3 | 3 | |
4 | __version__ = '3.4.1' | |
4 | __version__ = '3.5.1' |
6 | 6 | from __future__ import (print_function, division, |
7 | 7 | unicode_literals, absolute_import) |
8 | 8 | |
9 | # Std. lib imports. | |
9 | 10 | import re |
10 | 11 | |
11 | 12 | float_re = re.compile(r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?$') |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | This module is intended to help combine some locale functions | |
3 | together for natsort consumption. It also accounts for Python2 | |
4 | and Python3 differences. | |
5 | """ | |
6 | from __future__ import (print_function, division, | |
7 | unicode_literals, absolute_import) | |
8 | ||
9 | # Std. lib imports. | |
10 | import sys | |
11 | from itertools import chain | |
12 | from locale import localeconv | |
13 | ||
14 | # Local imports. | |
15 | from natsort.py23compat import py23_zip | |
16 | ||
17 | # We need cmp_to_key for Python2 because strxfrm is broken for unicode. | |
18 | if sys.version[:3] == '2.7': | |
19 | from functools import cmp_to_key | |
20 | # cmp_to_key was not created till 2.7. | |
21 | elif sys.version[:3] == '2.6': | |
22 | def cmp_to_key(mycmp): | |
23 | """Convert a cmp= function into a key= function""" | |
24 | class K(object): # pragma: no cover | |
25 | __slots__ = ['obj'] | |
26 | ||
27 | def __init__(self, obj): | |
28 | self.obj = obj | |
29 | ||
30 | def __lt__(self, other): | |
31 | return mycmp(self.obj, other.obj) < 0 | |
32 | ||
33 | def __gt__(self, other): | |
34 | return mycmp(self.obj, other.obj) > 0 | |
35 | ||
36 | def __eq__(self, other): | |
37 | return mycmp(self.obj, other.obj) == 0 | |
38 | ||
39 | def __le__(self, other): | |
40 | return mycmp(self.obj, other.obj) <= 0 | |
41 | ||
42 | def __ge__(self, other): | |
43 | return mycmp(self.obj, other.obj) >= 0 | |
44 | ||
45 | def __ne__(self, other): | |
46 | return mycmp(self.obj, other.obj) != 0 | |
47 | ||
48 | def __hash__(self): | |
49 | raise TypeError('hash not implemented') | |
50 | ||
51 | return K | |
52 | ||
53 | # Make the strxfrm function from strcoll on Python2 | |
54 | # It can be buggy, so prefer PyICU if available. | |
55 | try: | |
56 | import PyICU | |
57 | from locale import getlocale | |
58 | ||
59 | # If using PyICU, get the locale from the current global locale, | |
60 | # then create a sort key from that | |
61 | def get_pyicu_transform(l, _d={}): | |
62 | if l not in _d: | |
63 | if l == (None, None): | |
64 | c = PyICU.Collator.createInstance(PyICU.Locale()) | |
65 | else: | |
66 | loc = '.'.join(l) | |
67 | c = PyICU.Collator.createInstance(PyICU.Locale(loc)) | |
68 | _d[l] = c.getSortKey | |
69 | return _d[l] | |
70 | use_pyicu = True | |
71 | except ImportError: | |
72 | if sys.version[0] == '2': | |
73 | from locale import strcoll | |
74 | strxfrm = cmp_to_key(strcoll) | |
75 | else: | |
76 | from locale import strxfrm | |
77 | use_pyicu = False | |
78 | ||
79 | # This little lambda doubles all characters, making letters lowercase. | |
80 | groupletters = lambda x: ''.join(chain(*py23_zip(x.lower(), x))) | |
81 | ||
82 | ||
83 | def grouper(val, func): | |
84 | """\ | |
85 | Attempt to convert a string to a number. If the conversion | |
86 | was not possible, run it through the letter grouper | |
87 | to make the sorting work as requested. | |
88 | """ | |
89 | # Return the number or transformed string. | |
90 | # If the input is identical to the output, then no conversion happened. | |
91 | s = func(val) | |
92 | return groupletters(s) if val is s else s | |
93 | ||
94 | ||
95 | def locale_convert(val, func, group): | |
96 | """\ | |
97 | Attempt to convert a string to a number, first converting | |
98 | the decimal place character if needed. Then, if the conversion | |
99 | was not possible, run it through strxfrm to make the sorting | |
100 | as requested, possibly grouping first. | |
101 | """ | |
102 | ||
103 | # Format the number so that the conversion function can interpret it. | |
104 | radix = localeconv()['decimal_point'] | |
105 | s = val.replace(radix, '.') if radix != '.' else val | |
106 | ||
107 | # Perform the conversion | |
108 | t = func(s) | |
109 | ||
110 | # Return the number or transformed string. | |
111 | # If the input is identical to the output, then no conversion happened. | |
112 | # In this case, we don't want to return the function output because it | |
113 | # may have had characters modified from the above 'replace' call, | |
114 | # so we return the input. | |
115 | if group: | |
116 | if use_pyicu: | |
117 | xfrm = get_pyicu_transform(getlocale()) | |
118 | return xfrm(groupletters(val)) if s is t else t | |
119 | else: | |
120 | return strxfrm(groupletters(val)) if s is t else t | |
121 | else: | |
122 | if use_pyicu: | |
123 | xfrm = get_pyicu_transform(getlocale()) | |
124 | return xfrm(val) if s is t else t | |
125 | else: | |
126 | return strxfrm(val) if s is t else t |
14 | 14 | from __future__ import (print_function, division, |
15 | 15 | unicode_literals, absolute_import) |
16 | 16 | |
17 | import re | |
18 | from os import curdir, pardir | |
19 | from os.path import split, splitext | |
17 | # Std lib. imports. | |
20 | 18 | from operator import itemgetter |
21 | 19 | from functools import partial |
22 | from itertools import islice | |
23 | 20 | from warnings import warn |
24 | 21 | |
25 | # If the user has fastnumbers installed, they will get great speed | |
26 | # benefits. If not, we simulate the functions here. | |
27 | try: | |
28 | from fastnumbers import fast_float, fast_int, isreal | |
29 | except ImportError: | |
30 | from .fake_fastnumbers import fast_float, fast_int, isreal | |
31 | ||
32 | from .py23compat import u_format, py23_str, py23_zip | |
22 | # Local imports. | |
23 | from natsort.utils import _natsort_key, _args_to_enum | |
24 | from natsort.ns_enum import ns | |
25 | from natsort.py23compat import u_format | |
33 | 26 | |
34 | 27 | # Make sure the doctest works for either python2 or python3 |
35 | 28 | __doc__ = u_format(__doc__) |
36 | 29 | |
37 | # The regex that locates floats | |
38 | float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)') | |
39 | float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)') | |
40 | float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)') | |
41 | float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)') | |
42 | # Integer regexes | |
43 | int_nosign_re = re.compile(r'(\d+)') | |
44 | int_sign_re = re.compile(r'([-+]?\d+)') | |
45 | # This dict will help select the correct regex and number conversion function. | |
46 | regex_and_num_function_chooser = { | |
47 | (float, True, True): (float_sign_exp_re, fast_float), | |
48 | (float, True, False): (float_sign_noexp_re, fast_float), | |
49 | (float, False, True): (float_nosign_exp_re, fast_float), | |
50 | (float, False, False): (float_nosign_noexp_re, fast_float), | |
51 | (int, True, True): (int_sign_re, fast_int), | |
52 | (int, True, False): (int_sign_re, fast_int), | |
53 | (int, False, True): (int_nosign_re, fast_int), | |
54 | (int, False, False): (int_nosign_re, fast_int), | |
55 | (None, True, True): (int_nosign_re, fast_int), | |
56 | (None, True, False): (int_nosign_re, fast_int), | |
57 | (None, False, True): (int_nosign_re, fast_int), | |
58 | (None, False, False): (int_nosign_re, fast_int), | |
59 | } | |
60 | ||
61 | ||
62 | def _number_finder(s, regex, numconv, py3_safe): | |
63 | """Helper to split numbers""" | |
64 | ||
65 | # Split the input string by numbers. If there are no splits, return now. | |
66 | # If the input is not a string, TypeError is raised. | |
67 | s = regex.split(s) | |
68 | if len(s) == 1: | |
69 | return tuple(s) | |
70 | ||
71 | # Now convert the numbers to numbers, and leave strings as strings. | |
72 | # Remove empty strings from the list. | |
73 | s = [numconv(x) for x in s if x] | |
74 | ||
75 | # If the list begins with a number, lead with an empty string. | |
76 | # This is used to get around the "unorderable types" issue. | |
77 | if isreal(s[0]): | |
78 | s = [''] + s | |
79 | ||
80 | # The _py3_safe function inserts "" between numbers in the list, | |
81 | # and is used to get around "unorderable types" in complex cases. | |
82 | # It is a separate function that needs to be requested specifically | |
83 | # because it is expensive to call. | |
84 | return _py3_safe(s) if py3_safe else s | |
85 | ||
86 | ||
87 | def _path_splitter(s, _d_match=re.compile(r'\.\d').match): | |
88 | """Split a string into its path components. Assumes a string is a path.""" | |
89 | path_parts = [] | |
90 | p_append = path_parts.append | |
91 | path_location = s | |
92 | ||
93 | # Continue splitting the path from the back until we have reached | |
94 | # '..' or '.', or until there is nothing left to split. | |
95 | while path_location != curdir and path_location != pardir: | |
96 | parent_path = path_location | |
97 | path_location, child_path = split(parent_path) | |
98 | if path_location == parent_path: | |
99 | break | |
100 | p_append(child_path) | |
101 | ||
102 | # This last append is the base path. | |
103 | # Only append if the string is non-empty. | |
104 | if path_location: | |
105 | p_append(path_location) | |
106 | ||
107 | # We created this list in reversed order, so we now correct the order. | |
108 | path_parts.reverse() | |
109 | ||
110 | # Now, split off the file extensions using a similar method to above. | |
111 | # Continue splitting off file extensions until we reach a decimal number | |
112 | # or there are no more extensions. | |
113 | base = path_parts.pop() | |
114 | base_parts = [] | |
115 | b_append = base_parts.append | |
116 | while True: | |
117 | front = base | |
118 | base, ext = splitext(front) | |
119 | if _d_match(ext) or not ext: | |
120 | # Reset base to before the split if the split is invalid. | |
121 | base = front | |
122 | break | |
123 | b_append(ext) | |
124 | b_append(base) | |
125 | base_parts.reverse() | |
126 | ||
127 | # Return the split parent paths and then the split basename. | |
128 | return path_parts + base_parts | |
129 | ||
130 | ||
131 | def _py3_safe(parsed_list): | |
132 | """Insert '' between two numbers.""" | |
133 | length = len(parsed_list) | |
134 | if length < 2: | |
135 | return parsed_list | |
136 | else: | |
137 | new_list = [parsed_list[0]] | |
138 | nl_append = new_list.append | |
139 | for before, after in py23_zip(islice(parsed_list, 0, length-1), | |
140 | islice(parsed_list, 1, None)): | |
141 | if isreal(before) and isreal(after): | |
142 | nl_append("") | |
143 | nl_append(after) | |
144 | return new_list | |
145 | ||
146 | ||
147 | def _natsort_key(val, key=None, number_type=float, signed=True, exp=True, | |
148 | as_path=False, py3_safe=False): | |
149 | """\ | |
150 | Key to sort strings and numbers naturally. | |
151 | ||
152 | It works by separating out the numbers from the strings. This function for | |
153 | internal use only. See the natsort_keygen documentation for details of each | |
154 | parameter. | |
155 | ||
156 | Parameters | |
157 | ---------- | |
158 | val : {str, unicode} | |
159 | key : callable, optional | |
160 | number_type : {None, float, int}, optional | |
161 | signed : {True, False}, optional | |
162 | exp : {True, False}, optional | |
163 | as_path : {True, False}, optional | |
164 | py3_safe : {True, False}, optional | |
165 | ||
166 | Returns | |
167 | ------- | |
168 | out : tuple | |
169 | The modified value with numbers extracted. | |
170 | ||
171 | """ | |
172 | ||
173 | # Convert the arguments to the proper input tuple | |
174 | inp_options = (number_type, signed, exp) | |
175 | try: | |
176 | regex, num_function = regex_and_num_function_chooser[inp_options] | |
177 | except KeyError: | |
178 | # Report errors properly | |
179 | if number_type not in (float, int) and number_type is not None: | |
180 | raise ValueError("_natsort_key: 'number_type' parameter " | |
181 | "'{0}' invalid".format(py23_str(number_type))) | |
182 | elif signed not in (True, False): | |
183 | raise ValueError("_natsort_key: 'signed' parameter " | |
184 | "'{0}' invalid".format(py23_str(signed))) | |
185 | elif exp not in (True, False): | |
186 | raise ValueError("_natsort_key: 'exp' parameter " | |
187 | "'{0}' invalid".format(py23_str(exp))) | |
188 | else: | |
189 | # Apply key if needed. | |
190 | if key is not None: | |
191 | val = key(val) | |
192 | ||
193 | # If this is a path, convert it. | |
194 | # An AttrubuteError is raised if not a string. | |
195 | split_as_path = False | |
196 | if as_path: | |
197 | try: | |
198 | val = _path_splitter(val) | |
199 | except AttributeError: | |
200 | pass | |
201 | else: | |
202 | # Record that this string was split as a path so that | |
203 | # we can set as_path to False in the recursive call. | |
204 | split_as_path = True | |
205 | ||
206 | # Assume the input are strings, which is the most common case. | |
207 | try: | |
208 | return tuple(_number_finder(val, regex, num_function, py3_safe)) | |
209 | except TypeError: | |
210 | # If not strings, assume it is an iterable that must | |
211 | # be parsed recursively. Do not apply the key recursively. | |
212 | # If this string was split as a path, set as_path to False. | |
213 | try: | |
214 | return tuple([_natsort_key(x, None, number_type, signed, | |
215 | exp, as_path and not split_as_path, | |
216 | py3_safe) for x in val]) | |
217 | # If there is still an error, it must be a number. | |
218 | # Return as-is, with a leading empty string. | |
219 | # Waiting for two raised errors instead of calling | |
220 | # isinstance at the opening of the function is slower | |
221 | # for numbers but much faster for strings, and since | |
222 | # numbers are not a common input to natsort this is | |
223 | # an acceptable sacrifice. | |
224 | except TypeError: | |
225 | return (('', val,),) if as_path else ('', val,) | |
226 | ||
227 | ||
228 | @u_format | |
229 | def natsort_key(val, key=None, number_type=float, signed=True, exp=True, | |
230 | as_path=False, py3_safe=False): | |
30 | ||
31 | @u_format | |
32 | def natsort_key(val, key=None, number_type=float, signed=None, exp=None, | |
33 | as_path=None, py3_safe=None, alg=0): | |
231 | 34 | """\ |
232 | 35 | Key to sort strings and numbers naturally. |
233 | 36 | |
256 | 59 | It should accept a single argument and return a single value. |
257 | 60 | |
258 | 61 | number_type : {{None, float, int}}, optional |
259 | The types of number to sort on: `float` searches for floating | |
260 | point numbers, `int` searches for integers, and `None` searches | |
261 | for digits (like integers but does not take into account | |
262 | negative sign). `None` is a shortcut for `number_type = int` | |
263 | and `signed = False`. | |
62 | Depreciated as of version 3.5.0 and will become an undocumented | |
63 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
64 | for all future development. See :class:`ns` class documentation for | |
65 | details. | |
264 | 66 | |
265 | 67 | signed : {{True, False}}, optional |
266 | By default a '+' or '-' before a number is taken to be the sign | |
267 | of the number. If `signed` is `False`, any '+' or '-' will not | |
268 | be considered to be part of the number, but as part part of the | |
269 | string. | |
68 | Depreciated as of version 3.5.0 and will become an undocumented | |
69 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
70 | for all future development. See :class:`ns` class documentation for | |
71 | details. | |
270 | 72 | |
271 | 73 | exp : {{True, False}}, optional |
272 | This option only applies to `number_type = float`. If | |
273 | `exp = True`, a string like "3.5e5" will be interpreted as | |
274 | 350000, i.e. the exponential part is considered to be part of | |
275 | the number. If `exp = False`, "3.5e5" is interpreted as | |
276 | ``(3.5, "e", 5)``. The default behavior is `exp = True`. | |
74 | Depreciated as of version 3.5.0 and will become an undocumented | |
75 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
76 | for all future development. See :class:`ns` class documentation for | |
77 | details. | |
277 | 78 | |
278 | 79 | as_path : {{True, False}}, optional |
279 | This option will force strings to be interpreted as filesystem | |
280 | paths, so they will be split according to the filesystem separator | |
281 | (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the | |
282 | file extension, if any. Without this, lists of file paths like | |
283 | ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted | |
284 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
285 | The default behavior is `as_path = False`. | |
80 | Depreciated as of version 3.5.0 and will become an undocumented | |
81 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
82 | for all future development. See :class:`ns` class documentation for | |
83 | details. | |
286 | 84 | |
287 | 85 | py3_safe : {{True, False}}, optional |
288 | This will make the string parsing algorithm be more careful by | |
289 | placing an empty string between two adjacent numbers after the | |
290 | parsing algorithm. This will prevent the "unorderable types" | |
291 | error. | |
86 | Depreciated as of version 3.5.0 and will become an undocumented | |
87 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
88 | for all future development. See :class:`ns` class documentation for | |
89 | details. | |
90 | ||
91 | alg : ns enum, optional | |
92 | This option is used to control which algorithm `natsort` | |
93 | uses when sorting. For details into these options, please see | |
94 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
292 | 95 | |
293 | 96 | Returns |
294 | 97 | ------- |
347 | 150 | """ |
348 | 151 | msg = "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" |
349 | 152 | warn(msg, DeprecationWarning) |
350 | return _natsort_key(val, key, number_type, signed, exp, as_path, py3_safe) | |
351 | ||
352 | ||
353 | @u_format | |
354 | def natsort_keygen(key=None, number_type=float, signed=True, exp=True, | |
355 | as_path=False, py3_safe=False): | |
153 | alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg | |
154 | return _natsort_key(val, key, alg) | |
155 | ||
156 | ||
157 | @u_format | |
158 | def natsort_keygen(key=None, number_type=float, signed=None, exp=None, | |
159 | as_path=None, py3_safe=None, alg=0): | |
356 | 160 | """\ |
357 | 161 | Generate a key to sort strings and numbers naturally. |
358 | 162 | |
372 | 176 | It should accept a single argument and return a single value. |
373 | 177 | |
374 | 178 | number_type : {{None, float, int}}, optional |
375 | The types of number to sort on: `float` searches for floating | |
376 | point numbers, `int` searches for integers, and `None` searches | |
377 | for digits (like integers but does not take into account | |
378 | negative sign). `None` is a shortcut for `number_type = int` | |
379 | and `signed = False`. | |
179 | Depreciated as of version 3.5.0 and will become an undocumented | |
180 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
181 | for all future development. See :class:`ns` class documentation for | |
182 | details. | |
380 | 183 | |
381 | 184 | signed : {{True, False}}, optional |
382 | By default a '+' or '-' before a number is taken to be the sign | |
383 | of the number. If `signed` is `False`, any '+' or '-' will not | |
384 | be considered to be part of the number, but as part part of the | |
385 | string. | |
185 | Depreciated as of version 3.5.0 and will become an undocumented | |
186 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
187 | for all future development. See :class:`ns` class documentation for | |
188 | details. | |
386 | 189 | |
387 | 190 | exp : {{True, False}}, optional |
388 | This option only applies to `number_type = float`. If | |
389 | `exp = True`, a string like "3.5e5" will be interpreted as | |
390 | 350000, i.e. the exponential part is considered to be part of | |
391 | the number. If `exp = False`, "3.5e5" is interpreted as | |
392 | ``(3.5, "e", 5)``. The default behavior is `exp = True`. | |
191 | Depreciated as of version 3.5.0 and will become an undocumented | |
192 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
193 | for all future development. See :class:`ns` class documentation for | |
194 | details. | |
393 | 195 | |
394 | 196 | as_path : {{True, False}}, optional |
395 | This option will force strings to be interpreted as filesystem | |
396 | paths, so they will be split according to the filesystem separator | |
397 | (i.e. `/` on UNIX, `\\\\` on Windows), as well as splitting on the | |
398 | file extension, if any. Without this, lists with file paths like | |
399 | ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted | |
400 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
401 | The default behavior is `as_path = False`. | |
197 | Depreciated as of version 3.5.0 and will become an undocumented | |
198 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
199 | for all future development. See :class:`ns` class documentation for | |
200 | details. | |
402 | 201 | |
403 | 202 | py3_safe : {{True, False}}, optional |
404 | This will make the string parsing algorithm be more careful by | |
405 | placing an empty string between two adjacent numbers after the | |
406 | parsing algorithm. This will prevent the "unorderable types" | |
407 | error. | |
203 | Depreciated as of version 3.5.0 and will become an undocumented | |
204 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
205 | for all future development. See :class:`ns` class documentation for | |
206 | details. | |
207 | ||
208 | alg : ns enum, optional | |
209 | This option is used to control which algorithm `natsort` | |
210 | uses when sorting. For details into these options, please see | |
211 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
408 | 212 | |
409 | 213 | Returns |
410 | 214 | ------- |
439 | 243 | True |
440 | 244 | |
441 | 245 | """ |
442 | return partial(_natsort_key, | |
443 | key=key, | |
444 | number_type=number_type, | |
445 | signed=signed, | |
446 | exp=exp, | |
447 | as_path=as_path, | |
448 | py3_safe=py3_safe) | |
449 | ||
450 | ||
451 | @u_format | |
452 | def natsorted(seq, key=None, number_type=float, signed=True, exp=True, | |
453 | reverse=False, as_path=False): | |
246 | alg = _args_to_enum(number_type, signed, exp, as_path, py3_safe) | alg | |
247 | return partial(_natsort_key, key=key, alg=alg) | |
248 | ||
249 | ||
250 | @u_format | |
251 | def natsorted(seq, key=None, number_type=float, signed=None, exp=None, | |
252 | reverse=False, as_path=None, alg=0): | |
454 | 253 | """\ |
455 | 254 | Sorts a sequence naturally. |
456 | 255 | |
469 | 268 | It should accept a single argument and return a single value. |
470 | 269 | |
471 | 270 | number_type : {{None, float, int}}, optional |
472 | The types of number to sort on: `float` searches for floating | |
473 | point numbers, `int` searches for integers, and `None` searches | |
474 | for digits (like integers but does not take into account | |
475 | negative sign). `None` is a shortcut for `number_type = int` | |
476 | and `signed = False`. | |
271 | Depreciated as of version 3.5.0 and will become an undocumented | |
272 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
273 | for all future development. See :class:`ns` class documentation for | |
274 | details. | |
477 | 275 | |
478 | 276 | signed : {{True, False}}, optional |
479 | By default a '+' or '-' before a number is taken to be the sign | |
480 | of the number. If `signed` is `False`, any '+' or '-' will not | |
481 | be considered to be part of the number, but as part part of the | |
482 | string. | |
277 | Depreciated as of version 3.5.0 and will become an undocumented | |
278 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
279 | for all future development. See :class:`ns` class documentation for | |
280 | details. | |
483 | 281 | |
484 | 282 | exp : {{True, False}}, optional |
485 | This option only applies to `number_type = float`. If | |
486 | `exp = True`, a string like "3.5e5" will be interpreted as | |
487 | 350000, i.e. the exponential part is considered to be part of | |
488 | the number. If `exp = False`, "3.5e5" is interpreted as | |
489 | ``(3.5, "e", 5)``. The default behavior is `exp = True`. | |
283 | Depreciated as of version 3.5.0 and will become an undocumented | |
284 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
285 | for all future development. See :class:`ns` class documentation for | |
286 | details. | |
490 | 287 | |
491 | 288 | reverse : {{True, False}}, optional |
492 | 289 | Return the list in reversed sorted order. The default is |
493 | 290 | `False`. |
494 | 291 | |
495 | 292 | as_path : {{True, False}}, optional |
496 | This option will force strings to be interpreted as filesystem | |
497 | paths, so they will be split according to the filesystem separator | |
498 | (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the | |
499 | file extension, if any. Without this, lists of file paths like | |
500 | ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted | |
501 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
502 | The default behavior is `as_path = False`. | |
293 | Depreciated as of version 3.5.0 and will become an undocumented | |
294 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
295 | for all future development. See :class:`ns` class documentation for | |
296 | details. | |
297 | ||
298 | alg : ns enum, optional | |
299 | This option is used to control which algorithm `natsort` | |
300 | uses when sorting. For details into these options, please see | |
301 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
503 | 302 | |
504 | 303 | Returns |
505 | 304 | ------- |
509 | 308 | See Also |
510 | 309 | -------- |
511 | 310 | natsort_keygen : Generates the key that makes natural sorting possible. |
512 | versorted : A wrapper for ``natsorted(seq, number_type=None)``. | |
311 | versorted : A wrapper for ``natsorted(seq, alg=ns.VERSION)``. | |
312 | humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. | |
513 | 313 | index_natsorted : Returns the sorted indexes from `natsorted`. |
514 | 314 | |
515 | 315 | Examples |
521 | 321 | [{u}'num2', {u}'num3', {u}'num5'] |
522 | 322 | |
523 | 323 | """ |
324 | alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg | |
524 | 325 | try: |
525 | 326 | return sorted(seq, reverse=reverse, |
526 | key=natsort_keygen(key, number_type, | |
527 | signed, exp, as_path)) | |
528 | except TypeError as e: | |
327 | key=natsort_keygen(key, alg=alg)) | |
328 | except TypeError as e: # pragma: no cover | |
529 | 329 | # In the event of an unresolved "unorderable types" error |
530 | 330 | # attempt to sort again, being careful to prevent this error. |
531 | 331 | if 'unorderable types' in str(e): |
532 | 332 | return sorted(seq, reverse=reverse, |
533 | key=natsort_keygen(key, number_type, | |
534 | signed, exp, as_path, | |
535 | True)) | |
333 | key=natsort_keygen(key, | |
334 | alg=alg | ns.TYPESAFE)) | |
536 | 335 | else: |
537 | 336 | # Re-raise if the problem was not "unorderable types" |
538 | 337 | raise |
539 | 338 | |
540 | 339 | |
541 | 340 | @u_format |
542 | def versorted(seq, key=None, reverse=False, as_path=False): | |
341 | def versorted(seq, key=None, reverse=False, as_path=None, alg=0): | |
543 | 342 | """\ |
544 | 343 | Convenience function to sort version numbers. |
545 | 344 | |
546 | 345 | Convenience function to sort version numbers. This is a wrapper |
547 | around ``natsorted(seq, number_type=None)``. | |
346 | around ``natsorted(seq, alg=ns.VERSION)``. | |
548 | 347 | |
549 | 348 | Parameters |
550 | 349 | ---------- |
561 | 360 | `False`. |
562 | 361 | |
563 | 362 | as_path : {{True, False}}, optional |
564 | This option will force strings to be interpreted as filesystem | |
565 | paths, so they will be split according to the filesystem separator | |
566 | (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the | |
567 | file extension, if any. Without this, lists of file paths like | |
568 | ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted | |
569 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
570 | The default behavior is `as_path = False`. | |
363 | Depreciated as of version 3.5.0 and will become an undocumented | |
364 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
365 | for all future development. See :class:`ns` class documentation for | |
366 | details. | |
367 | ||
368 | alg : ns enum, optional | |
369 | This option is used to control which algorithm `natsort` | |
370 | uses when sorting. For details into these options, please see | |
371 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
571 | 372 | |
572 | 373 | Returns |
573 | 374 | ------- |
587 | 388 | [{u}'num3.4.1', {u}'num3.4.2', {u}'num4.0.2'] |
588 | 389 | |
589 | 390 | """ |
590 | return natsorted(seq, key, None, reverse=reverse, as_path=as_path) | |
591 | ||
592 | ||
593 | @u_format | |
594 | def index_natsorted(seq, key=None, number_type=float, signed=True, exp=True, | |
595 | reverse=False, as_path=False): | |
391 | alg = _args_to_enum(float, None, None, as_path, None) | alg | |
392 | return natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) | |
393 | ||
394 | ||
395 | @u_format | |
396 | def humansorted(seq, key=None, reverse=False, alg=0): | |
397 | """\ | |
398 | Convenience function to properly sort non-numeric characters. | |
399 | ||
400 | Convenience function to properly sort non-numeric characters | |
401 | in a locale-aware fashion (a.k.a "human sorting"). This is a | |
402 | wrapper around ``natsorted(seq, alg=ns.LOCALE)``. | |
403 | ||
404 | .. warning:: On some systems, the underlying C library that | |
405 | Python's locale module uses is broken. On these | |
406 | systems it is recommended that you install | |
407 | `PyICU <https://pypi.python.org/pypi/PyICU>`_. | |
408 | Please validate that this function works as | |
409 | expected on your target system, and if not you | |
410 | should add | |
411 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
412 | as a dependency. | |
413 | ||
414 | Parameters | |
415 | ---------- | |
416 | seq : iterable | |
417 | The sequence to sort. | |
418 | ||
419 | key : callable, optional | |
420 | A key used to determine how to sort each element of the sequence. | |
421 | It is **not** applied recursively. | |
422 | It should accept a single argument and return a single value. | |
423 | ||
424 | reverse : {{True, False}}, optional | |
425 | Return the list in reversed sorted order. The default is | |
426 | `False`. | |
427 | ||
428 | alg : ns enum, optional | |
429 | This option is used to control which algorithm `natsort` | |
430 | uses when sorting. For details into these options, please see | |
431 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
432 | ||
433 | Returns | |
434 | ------- | |
435 | out : list | |
436 | The sorted sequence. | |
437 | ||
438 | See Also | |
439 | -------- | |
440 | index_humansorted : Returns the sorted indexes from `humansorted`. | |
441 | ||
442 | Notes | |
443 | ----- | |
444 | You may find that if you do not explicitly set | |
445 | the locale your results may not be as you expect... I have found that | |
446 | it depends on the system you are on. To do this is straightforward | |
447 | (in the below example I use 'en_US.UTF-8', but you should use your | |
448 | locale):: | |
449 | ||
450 | >>> import locale | |
451 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
452 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
453 | >>> # right?) | |
454 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
455 | 'en_US.UTF-8' | |
456 | ||
457 | It is preferred that you do this before importing `natsort`. | |
458 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
459 | above) then you should not need to do this. | |
460 | ||
461 | Examples | |
462 | -------- | |
463 | Use `humansorted` just like the builtin `sorted`:: | |
464 | ||
465 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
466 | >>> natsorted(a) | |
467 | [{u}'Apple', {u}'Banana', {u}'apple', {u}'banana'] | |
468 | >>> humansorted(a) | |
469 | [{u}'apple', {u}'Apple', {u}'banana', {u}'Banana'] | |
470 | ||
471 | """ | |
472 | return natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) | |
473 | ||
474 | ||
475 | @u_format | |
476 | def index_natsorted(seq, key=None, number_type=float, signed=None, exp=None, | |
477 | reverse=False, as_path=None, alg=0): | |
596 | 478 | """\ |
597 | 479 | Return the list of the indexes used to sort the input sequence. |
598 | 480 | |
612 | 494 | It should accept a single argument and return a single value. |
613 | 495 | |
614 | 496 | number_type : {{None, float, int}}, optional |
615 | The types of number to sort on: `float` searches for floating | |
616 | point numbers, `int` searches for integers, and `None` searches | |
617 | for digits (like integers but does not take into account | |
618 | negative sign). `None` is a shortcut for `number_type = int` | |
619 | and `signed = False`. | |
497 | Depreciated as of version 3.5.0 and will become an undocumented | |
498 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
499 | for all future development. See :class:`ns` class documentation for | |
500 | details. | |
620 | 501 | |
621 | 502 | signed : {{True, False}}, optional |
622 | By default a '+' or '-' before a number is taken to be the sign | |
623 | of the number. If `signed` is `False`, any '+' or '-' will not | |
624 | be considered to be part of the number, but as part part of the | |
625 | string. | |
503 | Depreciated as of version 3.5.0 and will become an undocumented | |
504 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
505 | for all future development. See :class:`ns` class documentation for | |
506 | details. | |
626 | 507 | |
627 | 508 | exp : {{True, False}}, optional |
628 | This option only applies to `number_type = float`. If | |
629 | `exp = True`, a string like "3.5e5" will be interpreted as | |
630 | 350000, i.e. the exponential part is considered to be part of | |
631 | the number. If `exp = False`, "3.5e5" is interpreted as | |
632 | ``(3.5, "e", 5)``. The default behavior is `exp = True`. | |
509 | Depreciated as of version 3.5.0 and will become an undocumented | |
510 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
511 | for all future development. See :class:`ns` class documentation for | |
512 | details. | |
633 | 513 | |
634 | 514 | reverse : {{True, False}}, optional |
635 | 515 | Return the list in reversed sorted order. The default is |
636 | 516 | `False`. |
637 | 517 | |
638 | 518 | as_path : {{True, False}}, optional |
639 | This option will force strings to be interpreted as filesystem | |
640 | paths, so they will be split according to the filesystem separator | |
641 | (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the | |
642 | file extension, if any. Without this, lists of file paths like | |
643 | ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted | |
644 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
645 | The default behavior is `as_path = False`. | |
519 | Depreciated as of version 3.5.0 and will become an undocumented | |
520 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
521 | for all future development. See :class:`ns` class documentation for | |
522 | details. | |
523 | ||
524 | alg : ns enum, optional | |
525 | This option is used to control which algorithm `natsort` | |
526 | uses when sorting. For details into these options, please see | |
527 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
646 | 528 | |
647 | 529 | Returns |
648 | 530 | ------- |
672 | 554 | [{u}'baz', {u}'foo', {u}'bar'] |
673 | 555 | |
674 | 556 | """ |
557 | alg = _args_to_enum(number_type, signed, exp, as_path, None) | alg | |
675 | 558 | if key is None: |
676 | 559 | newkey = itemgetter(1) |
677 | 560 | else: |
680 | 563 | index_seq_pair = [[x, y] for x, y in enumerate(seq)] |
681 | 564 | try: |
682 | 565 | index_seq_pair.sort(reverse=reverse, |
683 | key=natsort_keygen(newkey, number_type, | |
684 | signed, exp, as_path)) | |
685 | except TypeError as e: | |
566 | key=natsort_keygen(newkey, alg=alg)) | |
567 | except TypeError as e: # pragma: no cover | |
686 | 568 | # In the event of an unresolved "unorderable types" error |
687 | 569 | # attempt to sort again, being careful to prevent this error. |
688 | 570 | if 'unorderable types' in str(e): |
689 | 571 | index_seq_pair.sort(reverse=reverse, |
690 | key=natsort_keygen(newkey, number_type, | |
691 | signed, exp, as_path, | |
692 | True)) | |
572 | key=natsort_keygen(newkey, | |
573 | alg=alg | ns.TYPESAFE)) | |
693 | 574 | else: |
694 | 575 | # Re-raise if the problem was not "unorderable types" |
695 | 576 | raise |
697 | 578 | |
698 | 579 | |
699 | 580 | @u_format |
700 | def index_versorted(seq, key=None, reverse=False, as_path=False): | |
581 | def index_versorted(seq, key=None, reverse=False, as_path=None, alg=0): | |
701 | 582 | """\ |
702 | 583 | Return the list of the indexes used to sort the input sequence |
703 | 584 | of version numbers. |
704 | 585 | |
705 | Sorts a sequence naturally, but returns a list of sorted the | |
586 | Sorts a sequence of version, but returns a list of sorted the | |
706 | 587 | indexes and not the sorted list. This list of indexes can be |
707 | 588 | used to sort multiple lists by the sorted order of the given |
708 | 589 | sequence. |
724 | 605 | `False`. |
725 | 606 | |
726 | 607 | as_path : {{True, False}}, optional |
727 | This option will force strings to be interpreted as filesystem | |
728 | paths, so they will be split according to the filesystem separator | |
729 | (i.e. '/' on UNIX, '\\\\' on Windows), as well as splitting on the | |
730 | file extension, if any. Without this, lists of file paths like | |
731 | ``['Folder', 'Folder (1)', 'Folder (10)']`` will not be sorted | |
732 | properly; ``'Folder'`` will be placed at the end, not at the front. | |
733 | The default behavior is `as_path = False`. | |
608 | Depreciated as of version 3.5.0 and will become an undocumented | |
609 | keyword-only argument in 4.0.0. Please use the `alg` argument | |
610 | for all future development. See :class:`ns` class documentation for | |
611 | details. | |
612 | ||
613 | alg : ns enum, optional | |
614 | This option is used to control which algorithm `natsort` | |
615 | uses when sorting. For details into these options, please see | |
616 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
734 | 617 | |
735 | 618 | Returns |
736 | 619 | ------- |
751 | 634 | [1, 2, 0] |
752 | 635 | |
753 | 636 | """ |
754 | return index_natsorted(seq, key, None, reverse=reverse, as_path=as_path) | |
637 | alg = _args_to_enum(float, None, None, as_path, None) | alg | |
638 | return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.VERSION) | |
639 | ||
640 | ||
641 | @u_format | |
642 | def index_humansorted(seq, key=None, reverse=False, alg=0): | |
643 | """\ | |
644 | Return the list of the indexes used to sort the input sequence | |
645 | in a locale-aware manner. | |
646 | ||
647 | Sorts a sequence in a locale-aware manner, but returns a list | |
648 | of sorted the indexes and not the sorted list. This list of | |
649 | indexes can be used to sort multiple lists by the sorted order | |
650 | of the given sequence. | |
651 | ||
652 | This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. | |
653 | ||
654 | Parameters | |
655 | ---------- | |
656 | seq: iterable | |
657 | The sequence to sort. | |
658 | ||
659 | key: callable, optional | |
660 | A key used to determine how to sort each element of the sequence. | |
661 | It is **not** applied recursively. | |
662 | It should accept a single argument and return a single value. | |
663 | ||
664 | reverse : {{True, False}}, optional | |
665 | Return the list in reversed sorted order. The default is | |
666 | `False`. | |
667 | ||
668 | alg : ns enum, optional | |
669 | This option is used to control which algorithm `natsort` | |
670 | uses when sorting. For details into these options, please see | |
671 | the :class:`ns` class documentation. The default is `ns.FLOAT`. | |
672 | ||
673 | Returns | |
674 | ------- | |
675 | out : tuple | |
676 | The ordered indexes of the sequence. | |
677 | ||
678 | See Also | |
679 | -------- | |
680 | humansorted | |
681 | order_by_index | |
682 | ||
683 | Notes | |
684 | ----- | |
685 | You may find that if you do not explicitly set | |
686 | the locale your results may not be as you expect... I have found that | |
687 | it depends on the system you are on. To do this is straightforward | |
688 | (in the below example I use 'en_US.UTF-8', but you should use your | |
689 | locale):: | |
690 | ||
691 | >>> import locale | |
692 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
693 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
694 | >>> # right?) | |
695 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
696 | 'en_US.UTF-8' | |
697 | ||
698 | It is preferred that you do this before importing `natsort`. | |
699 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
700 | above) then you should not need to do this. | |
701 | ||
702 | Examples | |
703 | -------- | |
704 | Use `index_humansorted` just like the builtin `sorted`:: | |
705 | ||
706 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
707 | >>> index_humansorted(a) | |
708 | [2, 0, 3, 1] | |
709 | ||
710 | """ | |
711 | return index_natsorted(seq, key, reverse=reverse, alg=alg | ns.LOCALE) | |
755 | 712 | |
756 | 713 | |
757 | 714 | @u_format |
791 | 748 | -------- |
792 | 749 | index_natsorted |
793 | 750 | index_versorted |
751 | index_humansorted | |
794 | 752 | |
795 | 753 | Examples |
796 | 754 | -------- |
0 | # -*- coding: utf-8 -*- | |
1 | """This module defines the "ns" enum for natsort.""" | |
2 | ||
3 | from __future__ import (print_function, division, | |
4 | unicode_literals, absolute_import) | |
5 | ||
6 | ||
7 | class ns(object): | |
8 | """ | |
9 | Enum to control the `natsort` algorithm. | |
10 | ||
11 | This class acts like an enum to control the `natsort` algorithm. The | |
12 | user may select several options simultaneously by or'ing the options | |
13 | together. For example, to choose ``ns.INT``, `ns.PATH``, and | |
14 | ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. | |
15 | ||
16 | Each option has a shortened 1- or 2-letter form. | |
17 | ||
18 | .. warning:: On some systems, the underlying C library that | |
19 | Python's locale module uses is broken. On these | |
20 | systems it is recommended that you install | |
21 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
22 | if you wish to use `LOCALE`. | |
23 | Please validate that `LOCALE` works as | |
24 | expected on your target system, and if not you | |
25 | should add | |
26 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
27 | as a dependency. | |
28 | ||
29 | Attributes | |
30 | ---------- | |
31 | FLOAT, F | |
32 | The default - parse numbers as floats. | |
33 | INT, I | |
34 | Tell `natsort` to parse numbers as ints. | |
35 | UNSIGNED, U | |
36 | Tell `natsort` to ignore any sign (i.e. "-" or "+") to the | |
37 | immediate left of a number. It is the same as setting the old | |
38 | `signed` option to `False`. | |
39 | VERSION, V | |
40 | This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful | |
41 | when attempting to sort version numbers. It is the same as | |
42 | setting the old `number_type` option to `None`. | |
43 | DIGIT, D | |
44 | Same as `VERSION` above. | |
45 | NOEXP, N | |
46 | Tell `natsort` to not search for exponents as part of the number. | |
47 | For example, with `NOEXP` the number "5.6E5" would be interpreted | |
48 | as `5.6`, `"E"`, and `5`. It is the same as setting the old `exp` | |
49 | option to `False`. | |
50 | PATH, P | |
51 | Tell `natsort` to interpret strings as filesystem paths, so they | |
52 | will be split according to the filesystem separator | |
53 | (i.e. ‘/’ on UNIX, ‘\’ on Windows), as well as splitting on the | |
54 | file extension, if any. Without this, lists of file paths like | |
55 | ``['Folder/', 'Folder (1)/', 'Folder (10)/']`` will not be sorted | |
56 | properly; 'Folder/' will be placed at the end, not at the front. | |
57 | It is the same as setting the old `as_path` option to `True`. | |
58 | LOCALE, L | |
59 | Tell `natsort` to be locale-aware when sorting strings (everything | |
60 | that was not converted to a number). Your sorting results will vary | |
61 | depending on your current locale. Generally, the `GROUPLETTERS` | |
62 | option is needed with `LOCALE` because the `locale` library | |
63 | groups the letters in the same manner (although you may still | |
64 | need `GROUPLETTERS` if there are numbers in your strings). | |
65 | IGNORECASE, IC | |
66 | Tell `natsort` to ignore case when sorting. For example, | |
67 | ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as | |
68 | ``['apple', 'Apple', 'Banana', 'banana']``. | |
69 | LOWERCASEFIRST, LF | |
70 | Tell `natsort` to put lowercase letters before uppercase letters | |
71 | when sorting. For example, | |
72 | ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as | |
73 | ``['apple', 'banana', 'Apple', 'Banana']`` (the default order | |
74 | would be ``['Apple', 'Banana', 'apple', 'banana']`` which is | |
75 | the order from a purely ordinal sort). | |
76 | Useless when used with `IGNORECASE`. | |
77 | GROUPLETTERS, G | |
78 | Tell `natsort` to group lowercase and uppercase letters together | |
79 | when sorting. For example, | |
80 | ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as | |
81 | ``['Apple', 'apple', 'Banana', 'banana']``. | |
82 | Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` | |
83 | to reverse the order of upper and lower case. | |
84 | TYPESAFE, T | |
85 | Try hard to avoid "unorderable types" error on Python 3. It | |
86 | is the same as setting the old `py3_safe` option to `True`. | |
87 | ||
88 | Notes | |
89 | ----- | |
90 | If using `LOCALE`, you may find that if you do not explicitly set | |
91 | the locale your results may not be as you expect... I have found that | |
92 | it depends on the system you are on. To do this is straightforward | |
93 | (in the below example I use 'en_US.UTF-8', but you should use your | |
94 | locale):: | |
95 | ||
96 | >>> import locale | |
97 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
98 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
99 | >>> # right?) | |
100 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
101 | 'en_US.UTF-8' | |
102 | ||
103 | It is preferred that you do this before importing `natsort`. | |
104 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
105 | above) then you should not need to do this. | |
106 | ||
107 | """ | |
108 | pass | |
109 | ||
110 | ||
111 | # Sort algorithm "enum" values. | |
112 | _nsdict = {'FLOAT': 0, 'F': 0, | |
113 | 'INT': 1, 'I': 1, | |
114 | 'UNSIGNED': 2, 'U': 2, | |
115 | 'VERSION': 3, 'V': 3, # Shortcut for INT | UNSIGNED | |
116 | 'DIGIT': 3, 'D': 3, # Shortcut for INT | UNSIGNED | |
117 | 'NOEXP': 4, 'N': 4, | |
118 | 'PATH': 8, 'P': 8, | |
119 | 'LOCALE': 16, 'L': 16, | |
120 | 'IGNORECASE': 32, 'IC': 32, | |
121 | 'LOWERCASEFIRST': 64, 'LF': 64, | |
122 | 'GROUPLETTERS': 128, 'G': 128, | |
123 | 'TYPESAFE': 1024, 'T': 1024, | |
124 | } | |
125 | # Populate the ns class with the _nsdict values. | |
126 | for x, y in _nsdict.items(): | |
127 | setattr(ns, x, y) |
0 | # -*- coding: utf-8 -*- | |
1 | """ | |
2 | Utilities and definitions for natsort, mostly all used to define | |
3 | the _natsort_key function. | |
4 | ||
5 | """ | |
6 | ||
7 | from __future__ import (print_function, division, | |
8 | unicode_literals, absolute_import) | |
9 | ||
10 | # Std. lib imports. | |
11 | import re | |
12 | from warnings import warn | |
13 | from os import curdir, pardir | |
14 | from os.path import split, splitext | |
15 | from itertools import islice | |
16 | from locale import localeconv | |
17 | ||
18 | # Local imports. | |
19 | from natsort.locale_help import locale_convert, grouper | |
20 | from natsort.py23compat import py23_str, py23_zip | |
21 | from natsort.ns_enum import ns, _nsdict | |
22 | ||
23 | # If the user has fastnumbers installed, they will get great speed | |
24 | # benefits. If not, we simulate the functions here. | |
25 | try: | |
26 | from fastnumbers import fast_float, fast_int, isreal | |
27 | except ImportError: | |
28 | from natsort.fake_fastnumbers import fast_float, fast_int, isreal | |
29 | ||
30 | # Group algorithm types for easy extraction | |
31 | _NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.NOEXP | |
32 | _ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.N | ns.L | | |
33 | ns.IC | ns.LF | ns.G | ns.TYPESAFE) | |
34 | ||
35 | # The regex that locates floats | |
36 | _float_sign_exp_re = re.compile(r'([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) | |
37 | _float_nosign_exp_re = re.compile(r'(\d*\.?\d+(?:[eE][-+]?\d+)?)', re.U) | |
38 | _float_sign_noexp_re = re.compile(r'([-+]?\d*\.?\d+)', re.U) | |
39 | _float_nosign_noexp_re = re.compile(r'(\d*\.?\d+)', re.U) | |
40 | _float_sign_exp_re_c = re.compile(r'([-+]?\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) | |
41 | _float_nosign_exp_re_c = re.compile(r'(\d*[.,]?\d+(?:[eE][-+]?\d+)?)', re.U) | |
42 | _float_sign_noexp_re_c = re.compile(r'([-+]?\d*[.,]?\d+)', re.U) | |
43 | _float_nosign_noexp_re_c = re.compile(r'(\d*[.,]?\d+)', re.U) | |
44 | ||
45 | # Integer regexes | |
46 | _int_nosign_re = re.compile(r'(\d+)', re.U) | |
47 | _int_sign_re = re.compile(r'([-+]?\d+)', re.U) | |
48 | ||
49 | # This dict will help select the correct regex and number conversion function. | |
50 | _regex_and_num_function_chooser = { | |
51 | (ns.F, '.'): (_float_sign_exp_re, fast_float), | |
52 | (ns.F | ns.N, '.'): (_float_sign_noexp_re, fast_float), | |
53 | (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), | |
54 | (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), | |
55 | (ns.I, '.'): (_int_sign_re, fast_int), | |
56 | (ns.I | ns.N, '.'): (_int_sign_re, fast_int), | |
57 | (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), | |
58 | (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), | |
59 | (ns.F, ','): (_float_sign_exp_re_c, fast_float), | |
60 | (ns.F | ns.N, ','): (_float_sign_noexp_re_c, fast_float), | |
61 | (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), | |
62 | (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), | |
63 | (ns.I, ','): (_int_sign_re, fast_int), | |
64 | (ns.I | ns.N, ','): (_int_sign_re, fast_int), | |
65 | (ns.I | ns.U, ','): (_int_nosign_re, fast_int), | |
66 | (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), | |
67 | } | |
68 | ||
69 | ||
70 | def _args_to_enum(number_type, signed, exp, as_path, py3_safe): | |
71 | """A function to convert input booleans to an enum-type argument.""" | |
72 | alg = 0 | |
73 | if number_type is not float: | |
74 | msg = "The 'number_type' argument is depreciated as of 3.5.0, " | |
75 | msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" | |
76 | warn(msg, DeprecationWarning) | |
77 | alg |= (_nsdict['INT'] * bool(number_type in (int, None))) | |
78 | alg |= (_nsdict['UNSIGNED'] * (number_type is None)) | |
79 | if signed is not None: | |
80 | msg = "The 'signed' argument is depreciated as of 3.5.0, " | |
81 | msg += "please use 'alg=ns.UNSIGNED'." | |
82 | warn(msg, DeprecationWarning) | |
83 | alg |= (_nsdict['UNSIGNED'] * (not signed)) | |
84 | if exp is not None: | |
85 | msg = "The 'exp' argument is depreciated as of 3.5.0, " | |
86 | msg += "please use 'alg=ns.NOEXP'." | |
87 | warn(msg, DeprecationWarning) | |
88 | alg |= (_nsdict['NOEXP'] * (not exp)) | |
89 | if as_path is not None: | |
90 | msg = "The 'as_path' argument is depreciated as of 3.5.0, " | |
91 | msg += "please use 'alg=ns.PATH'." | |
92 | warn(msg, DeprecationWarning) | |
93 | alg |= (_nsdict['PATH'] * as_path) | |
94 | if py3_safe is not None: | |
95 | msg = "The 'py3_safe' argument is depreciated as of 3.5.0, " | |
96 | msg += "please use 'alg=ns.TYPESAFE'." | |
97 | warn(msg, DeprecationWarning) | |
98 | alg |= (_nsdict['TYPESAFE'] * py3_safe) | |
99 | return alg | |
100 | ||
101 | ||
102 | def _input_parser(s, regex, numconv, py3_safe, use_locale, group_letters): | |
103 | """Helper to parse the string input into numbers and strings.""" | |
104 | ||
105 | # Split the input string by numbers. | |
106 | # If the input is not a string, TypeError is raised. | |
107 | s = regex.split(s) | |
108 | ||
109 | # Now convert the numbers to numbers, and leave strings as strings. | |
110 | # Take into account locale if needed, and group letters if needed. | |
111 | # Remove empty strings from the list. | |
112 | if use_locale: | |
113 | s = [locale_convert(x, numconv, group_letters) for x in s if x] | |
114 | elif group_letters: | |
115 | s = [grouper(x, numconv) for x in s if x] | |
116 | else: | |
117 | s = [numconv(x) for x in s if x] | |
118 | ||
119 | # If the list begins with a number, lead with an empty string. | |
120 | # This is used to get around the "unorderable types" issue. | |
121 | if not s: # Return empty tuple for empty results. | |
122 | return () | |
123 | elif isreal(s[0]): | |
124 | s = [''] + s | |
125 | ||
126 | # The _py3_safe function inserts "" between numbers in the list, | |
127 | # and is used to get around "unorderable types" in complex cases. | |
128 | # It is a separate function that needs to be requested specifically | |
129 | # because it is expensive to call. | |
130 | return _py3_safe(s) if py3_safe else s | |
131 | ||
132 | ||
133 | def _path_splitter(s, _d_match=re.compile(r'\.\d').match): | |
134 | """Split a string into its path components. Assumes a string is a path.""" | |
135 | path_parts = [] | |
136 | p_append = path_parts.append | |
137 | path_location = s | |
138 | ||
139 | # Continue splitting the path from the back until we have reached | |
140 | # '..' or '.', or until there is nothing left to split. | |
141 | while path_location != curdir and path_location != pardir: | |
142 | parent_path = path_location | |
143 | path_location, child_path = split(parent_path) | |
144 | if path_location == parent_path: | |
145 | break | |
146 | p_append(child_path) | |
147 | ||
148 | # This last append is the base path. | |
149 | # Only append if the string is non-empty. | |
150 | if path_location: | |
151 | p_append(path_location) | |
152 | ||
153 | # We created this list in reversed order, so we now correct the order. | |
154 | path_parts.reverse() | |
155 | ||
156 | # Now, split off the file extensions using a similar method to above. | |
157 | # Continue splitting off file extensions until we reach a decimal number | |
158 | # or there are no more extensions. | |
159 | base = path_parts.pop() | |
160 | base_parts = [] | |
161 | b_append = base_parts.append | |
162 | while True: | |
163 | front = base | |
164 | base, ext = splitext(front) | |
165 | if _d_match(ext) or not ext: | |
166 | # Reset base to before the split if the split is invalid. | |
167 | base = front | |
168 | break | |
169 | b_append(ext) | |
170 | b_append(base) | |
171 | base_parts.reverse() | |
172 | ||
173 | # Return the split parent paths and then the split basename. | |
174 | return path_parts + base_parts | |
175 | ||
176 | ||
177 | def _py3_safe(parsed_list): | |
178 | """Insert '' between two numbers.""" | |
179 | length = len(parsed_list) | |
180 | if length < 2: | |
181 | return parsed_list | |
182 | else: | |
183 | new_list = [parsed_list[0]] | |
184 | nl_append = new_list.append | |
185 | for before, after in py23_zip(islice(parsed_list, 0, length-1), | |
186 | islice(parsed_list, 1, None)): | |
187 | if isreal(before) and isreal(after): | |
188 | nl_append("") | |
189 | nl_append(after) | |
190 | return new_list | |
191 | ||
192 | ||
193 | def _natsort_key(val, key, alg): | |
194 | """\ | |
195 | Key to sort strings and numbers naturally. | |
196 | ||
197 | It works by separating out the numbers from the strings. This function for | |
198 | internal use only. See the natsort_keygen documentation for details of each | |
199 | parameter. | |
200 | ||
201 | Parameters | |
202 | ---------- | |
203 | val : {str, unicode} | |
204 | key : callable | |
205 | alg : ns enum | |
206 | ||
207 | Returns | |
208 | ------- | |
209 | out : tuple | |
210 | The modified value with numbers extracted. | |
211 | ||
212 | """ | |
213 | ||
214 | # Convert the arguments to the proper input tuple | |
215 | try: | |
216 | use_locale = alg & _nsdict['LOCALE'] | |
217 | inp_options = (alg & _NUMBER_ALGORITHMS, | |
218 | localeconv()['decimal_point'] if use_locale else '.') | |
219 | except TypeError: | |
220 | msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" | |
221 | raise ValueError(msg+', got {0}'.format(py23_str(alg))) | |
222 | ||
223 | # Get the proper regex and conversion function. | |
224 | try: | |
225 | regex, num_function = _regex_and_num_function_chooser[inp_options] | |
226 | except KeyError: # pragma: no cover | |
227 | if inp_options[1] not in ('.', ','): # pragma: no cover | |
228 | raise ValueError("_natsort_key: currently natsort only supports " | |
229 | "the decimal separators '.' and ','. " | |
230 | "Please file a bug report.") | |
231 | else: | |
232 | raise | |
233 | else: | |
234 | # Apply key if needed. | |
235 | if key is not None: | |
236 | val = key(val) | |
237 | ||
238 | # If this is a path, convert it. | |
239 | # An AttrubuteError is raised if not a string. | |
240 | split_as_path = False | |
241 | if alg & _nsdict['PATH']: | |
242 | try: | |
243 | val = _path_splitter(val) | |
244 | except AttributeError: | |
245 | pass | |
246 | else: | |
247 | # Record that this string was split as a path so that | |
248 | # we don't set PATH in the recursive call. | |
249 | split_as_path = True | |
250 | ||
251 | # Assume the input are strings, which is the most common case. | |
252 | # Apply the string modification if needed. | |
253 | try: | |
254 | if alg & _nsdict['LOWERCASEFIRST']: | |
255 | val = val.swapcase() | |
256 | if alg & _nsdict['IGNORECASE']: | |
257 | val = val.lower() | |
258 | return tuple(_input_parser(val, | |
259 | regex, | |
260 | num_function, | |
261 | alg & _nsdict['TYPESAFE'], | |
262 | use_locale, | |
263 | alg & _nsdict['GROUPLETTERS'])) | |
264 | except (TypeError, AttributeError): | |
265 | # If not strings, assume it is an iterable that must | |
266 | # be parsed recursively. Do not apply the key recursively. | |
267 | # If this string was split as a path, turn off 'PATH'. | |
268 | try: | |
269 | was_path = alg & _nsdict['PATH'] | |
270 | newalg = alg & _ALL_BUT_PATH | |
271 | newalg |= (was_path * (not split_as_path)) | |
272 | return tuple([_natsort_key(x, None, newalg) for x in val]) | |
273 | # If there is still an error, it must be a number. | |
274 | # Return as-is, with a leading empty string. | |
275 | except TypeError: | |
276 | return (('', val,),) if alg & _nsdict['PATH'] else ('', val,) |
8 | 8 | natsort/py23compat.py UndefinedName |
9 | 9 | natsort/__init__.py UnusedImport |
10 | 10 | docs/source/conf.py ALL |
11 | test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused | |
12 | test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused | |
11 | 13 | |
12 | 14 | pep8ignore = |
13 | 15 | test_natsort/test_natsort.py E501 E241 E221 |
16 | test_natsort/test_utils.py E501 E241 E221 | |
14 | 17 | docs/source/conf.py ALL |
20 | 20 | def run_tests(self): |
21 | 21 | # import here, cause outside the eggs aren't loaded |
22 | 22 | import pytest |
23 | err1 = pytest.main(['--cov', 'natsort', '--flakes', '--pep8']) | |
23 | err1 = pytest.main(['--cov', 'natsort', | |
24 | '--cov-report', 'term-missing', | |
25 | '--flakes', '--pep8']) | |
24 | 26 | err2 = pytest.main(['--doctest-modules', 'natsort']) |
25 | 27 | err3 = pytest.main(['README.rst', |
26 | 28 | 'docs/source/intro.rst', |
66 | 68 | install_requires=REQUIRES, |
67 | 69 | packages=['natsort'], |
68 | 70 | entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, |
69 | tests_require=['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov'], | |
71 | tests_require=['pytest', 'pytest-pep8', | |
72 | 'pytest-flakes', 'pytest-cov'], | |
70 | 73 | cmdclass={'test': PyTest}, |
71 | 74 | description=DESCRIPTION, |
72 | 75 | long_description=LONG_DESCRIPTION, |
0 | 0 | # -*- coding: utf-8 -*- |
1 | 1 | """\ |
2 | This file contains functions to stress-test natsort. | |
2 | This file contains functions to stress-test natsort, looking | |
3 | for cases that raise an unknown exception. | |
3 | 4 | """ |
4 | 5 | from random import randint, sample, choice |
5 | 6 | from string import printable |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Test the locale help module module. | |
3 | """ | |
4 | import locale | |
5 | from natsort.fake_fastnumbers import fast_float | |
6 | from natsort.locale_help import grouper, locale_convert, use_pyicu | |
7 | ||
8 | if use_pyicu: | |
9 | from natsort.locale_help import get_pyicu_transform | |
10 | from locale import getlocale | |
11 | else: | |
12 | from natsort.locale_help import strxfrm | |
13 | ||
14 | ||
15 | def test_grouper(): | |
16 | assert grouper('HELLO', fast_float) == 'hHeElLlLoO' | |
17 | assert grouper('hello', fast_float) == 'hheelllloo' | |
18 | assert grouper('45.8e-2', fast_float) == 45.8e-2 | |
19 | ||
20 | ||
21 | def test_locale_convert(): | |
22 | locale.setlocale(locale.LC_NUMERIC, 'en_US.UTF-8') | |
23 | if use_pyicu: | |
24 | from natsort.locale_help import get_pyicu_transform | |
25 | from locale import getlocale | |
26 | strxfrm = get_pyicu_transform(getlocale()) | |
27 | else: | |
28 | from natsort.locale_help import strxfrm | |
29 | assert locale_convert('45.8', fast_float, False) == 45.8 | |
30 | assert locale_convert('45,8', fast_float, False) == strxfrm('45,8') | |
31 | assert locale_convert('hello', fast_float, False) == strxfrm('hello') | |
32 | assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') | |
33 | assert locale_convert('45,8', fast_float, True) == strxfrm('4455,,88') | |
34 | ||
35 | locale.setlocale(locale.LC_NUMERIC, 'de_DE.UTF-8') | |
36 | if use_pyicu: | |
37 | strxfrm = get_pyicu_transform(getlocale()) | |
38 | assert locale_convert('45.8', fast_float, False) == 45.8 | |
39 | assert locale_convert('45,8', fast_float, False) == 45.8 | |
40 | assert locale_convert('hello', fast_float, False) == strxfrm('hello') | |
41 | assert locale_convert('hello', fast_float, True) == strxfrm('hheelllloo') | |
42 | ||
43 | locale.setlocale(locale.LC_NUMERIC, '') |
205 | 205 | self.signed = True |
206 | 206 | self.exp = True |
207 | 207 | self.paths = as_path |
208 | self.locale = 0 | |
208 | 209 | |
209 | 210 | entries = ['tmp/a57/path2', |
210 | 211 | 'tmp/a23/path1', |
2 | 2 | Here are a collection of examples of how this module can be used. |
3 | 3 | See the README or the natsort homepage for more details. |
4 | 4 | """ |
5 | from __future__ import unicode_literals | |
5 | 6 | import warnings |
7 | import locale | |
6 | 8 | from operator import itemgetter |
7 | 9 | from pytest import raises |
8 | from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted, natsort_keygen, order_by_index | |
9 | from natsort.natsort import _number_finder, _py3_safe, _natsort_key | |
10 | from natsort.natsort import float_sign_exp_re, float_nosign_exp_re, float_sign_noexp_re | |
11 | from natsort.natsort import float_nosign_noexp_re, int_nosign_re, int_sign_re | |
12 | ||
13 | try: | |
14 | from fastnumbers import fast_float, fast_int | |
15 | except ImportError: | |
16 | from natsort.fake_fastnumbers import fast_float, fast_int | |
17 | ||
18 | ||
19 | def test_number_finder(): | |
20 | ||
21 | assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['a', 5.0, 0.5034] | |
22 | assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, False) == ['a', 5.0, '+', 0.5034] | |
23 | assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, False) == ['a', 5.0, 5.034, 'e', -1.0] | |
24 | assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, False) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
25 | assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, False) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
26 | assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, False) == ['a', 5, 5, '.', 34, 'e', -1] | |
27 | ||
28 | assert _number_finder('a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['a', 5.0, '', 0.5034] | |
29 | assert _number_finder('a5+5.034e-1', float_nosign_exp_re, fast_float, True) == ['a', 5.0, '+', 0.5034] | |
30 | assert _number_finder('a5+5.034e-1', float_sign_noexp_re, fast_float, True) == ['a', 5.0, '', 5.034, 'e', -1.0] | |
31 | assert _number_finder('a5+5.034e-1', float_nosign_noexp_re, fast_float, True) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
32 | assert _number_finder('a5+5.034e-1', int_nosign_re, fast_int, True) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
33 | assert _number_finder('a5+5.034e-1', int_sign_re, fast_int, True) == ['a', 5, '', 5, '.', 34, 'e', -1] | |
34 | ||
35 | assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, False) == ['', 6.0, 'a', 5.0, 0.5034] | |
36 | assert _number_finder('6a5+5.034e-1', float_sign_exp_re, fast_float, True) == ['', 6.0, 'a', 5.0, '', 0.5034] | |
37 | ||
38 | ||
39 | def test_py3_safe(): | |
40 | ||
41 | assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] | |
42 | assert _py3_safe(['a']) == ['a'] | |
43 | assert _py3_safe(['a', 5]) == ['a', 5] | |
44 | assert _py3_safe([5, 9]) == [5, '', 9] | |
45 | ||
46 | ||
47 | def test_natsort_key_private(): | |
48 | ||
49 | a = ['num3', 'num5', 'num2'] | |
50 | a.sort(key=_natsort_key) | |
51 | assert a == ['num2', 'num3', 'num5'] | |
52 | ||
53 | # The below illustrates how the key works, and how the different options affect sorting. | |
54 | assert _natsort_key('a-5.034e2') == ('a', -503.4) | |
55 | assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=True) == ('a', -503.4) | |
56 | assert _natsort_key('a-5.034e2', number_type=float, signed=True, exp=False) == ('a', -5.034, 'e', 2.0) | |
57 | assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=True) == ('a-', 503.4) | |
58 | assert _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == ('a-', 5.034, 'e', 2.0) | |
59 | assert _natsort_key('a-5.034e2', number_type=int) == ('a', -5, '.', 34, 'e', 2) | |
60 | assert _natsort_key('a-5.034e2', number_type=int, signed=False) == ('a-', 5, '.', 34, 'e', 2) | |
61 | assert _natsort_key('a-5.034e2', number_type=None) == _natsort_key('a-5.034e2', number_type=int, signed=False) | |
62 | assert _natsort_key('a-5.034e2', key=lambda x: x.upper()) == ('A', -503.4) | |
63 | ||
64 | # Iterables are parsed recursively so you can sort lists of lists. | |
65 | assert _natsort_key(('a1', 'a-5.034e2')) == (('a', 1.0), ('a', -503.4)) | |
66 | assert _natsort_key(('a1', 'a-5.034e2'), number_type=None) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) | |
67 | # A key is applied before recursion, but not in the recursive calls. | |
68 | assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1)) == ('a', -503.4) | |
69 | ||
70 | # Strings that lead with a number get an empty string at the front of the tuple. | |
71 | # This is designed to get around the "unorderable types" issue. | |
72 | assert _natsort_key(('15a', '6')) == (('', 15.0, 'a'), ('', 6.0)) | |
73 | assert _natsort_key(10) == ('', 10) | |
74 | ||
75 | # Turn on as_path to split a file path into components | |
76 | assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', as_path=True) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
77 | assert _natsort_key('../Folder (10)/file (2).tar.gz', as_path=True) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
78 | assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', as_path=True) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
79 | ||
80 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
81 | # so it will sort against the other as_path results. | |
82 | assert _natsort_key(10, as_path=True) == (('', 10),) | |
83 | # as_path also handles recursion well. | |
84 | assert _natsort_key(('/Folder', '/Folder (1)'), as_path=True) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) | |
85 | ||
86 | # Turn on py3_safe to put a '' between adjacent numbers | |
87 | assert _natsort_key('43h7+3', py3_safe=True) == ('', 43.0, 'h', 7.0, '', 3.0) | |
88 | ||
89 | # Invalid arguments give the correct response | |
90 | with raises(ValueError) as err: | |
91 | _natsort_key('a', number_type='float') | |
92 | assert str(err.value) == "_natsort_key: 'number_type' parameter 'float' invalid" | |
93 | with raises(ValueError) as err: | |
94 | _natsort_key('a', signed='True') | |
95 | assert str(err.value) == "_natsort_key: 'signed' parameter 'True' invalid" | |
96 | with raises(ValueError) as err: | |
97 | _natsort_key('a', exp='False') | |
98 | assert str(err.value) == "_natsort_key: 'exp' parameter 'False' invalid" | |
10 | from natsort import natsorted, index_natsorted, natsort_key, versorted, index_versorted | |
11 | from natsort import humansorted, index_humansorted, natsort_keygen, order_by_index, ns | |
12 | from natsort.utils import _natsort_key | |
99 | 13 | |
100 | 14 | |
101 | 15 | def test_natsort_key_public(): |
104 | 18 | # But it raises a depreciation warning |
105 | 19 | with warnings.catch_warnings(record=True) as w: |
106 | 20 | warnings.simplefilter("always") |
107 | assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2') | |
21 | assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.F) | |
108 | 22 | assert len(w) == 1 |
109 | 23 | assert "natsort_key is depreciated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) |
110 | assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) | |
24 | assert natsort_key('a-5.034e2', number_type=float, signed=False, exp=False) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N) | |
25 | assert natsort_key('a-5.034e2', alg=ns.F | ns.U | ns.N) == _natsort_key('a-5.034e2', key=None, alg=ns.F | ns.U | ns.N) | |
111 | 26 | |
112 | 27 | # It is called for each element in a list when sorting |
113 | 28 | with warnings.catch_warnings(record=True) as w: |
121 | 36 | |
122 | 37 | # Creates equivalent natsort keys |
123 | 38 | a = 'a-5.034e1' |
124 | assert natsort_keygen()(a) == _natsort_key(a) | |
125 | assert natsort_keygen(signed=False)(a) == _natsort_key(a, signed=False) | |
126 | assert natsort_keygen(exp=False)(a) == _natsort_key(a, exp=False) | |
127 | assert natsort_keygen(signed=False, exp=False)(a) == _natsort_key(a, signed=False, exp=False) | |
128 | assert natsort_keygen(number_type=int)(a) == _natsort_key(a, number_type=int) | |
129 | assert natsort_keygen(number_type=int, signed=False)(a) == _natsort_key(a, number_type=int, signed=False) | |
130 | assert natsort_keygen(number_type=None)(a) == _natsort_key(a, number_type=None) | |
131 | assert natsort_keygen(as_path=True)(a) == _natsort_key(a, as_path=True) | |
39 | assert natsort_keygen()(a) == _natsort_key(a, key=None, alg=ns.F) | |
40 | assert natsort_keygen(alg=ns.UNSIGNED)(a) == _natsort_key(a, key=None, alg=ns.U) | |
41 | assert natsort_keygen(alg=ns.NOEXP)(a) == _natsort_key(a, key=None, alg=ns.N) | |
42 | assert natsort_keygen(alg=ns.U | ns.N)(a) == _natsort_key(a, key=None, alg=ns.U | ns.N) | |
43 | assert natsort_keygen(alg=ns.INT)(a) == _natsort_key(a, key=None, alg=ns.INT) | |
44 | assert natsort_keygen(alg=ns.I | ns.U)(a) == _natsort_key(a, key=None, alg=ns.I | ns.U) | |
45 | assert natsort_keygen(alg=ns.VERSION)(a) == _natsort_key(a, key=None, alg=ns.V) | |
46 | assert natsort_keygen(alg=ns.PATH)(a) == _natsort_key(a, key=None, alg=ns.PATH) | |
132 | 47 | |
133 | 48 | # Custom keys are more straightforward with keygen |
134 | 49 | f1 = natsort_keygen(key=lambda x: x.upper()) |
135 | f2 = lambda x: _natsort_key(x, key=lambda y: y.upper()) | |
50 | f2 = lambda x: _natsort_key(x, key=lambda y: y.upper(), alg=ns.F) | |
136 | 51 | assert f1(a) == f2(a) |
137 | 52 | |
138 | 53 | # It also makes sorting lists in-place easier (no lambdas!) |
139 | 54 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] |
140 | 55 | b = a[:] |
141 | a.sort(key=natsort_keygen(number_type=int)) | |
142 | assert a == natsorted(b, number_type=int) | |
56 | a.sort(key=natsort_keygen(alg=ns.I)) | |
57 | assert a == natsorted(b, alg=ns.I) | |
143 | 58 | |
144 | 59 | |
145 | 60 | def test_natsorted(): |
150 | 65 | |
151 | 66 | # Number types |
152 | 67 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] |
153 | assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
154 | assert natsorted(a, number_type=float, exp=False) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] | |
155 | assert natsorted(a, number_type=int) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
156 | assert natsorted(a, number_type=None) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
68 | assert natsorted(a) == ['a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
69 | assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.'] | |
70 | assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
71 | assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
157 | 72 | |
158 | 73 | # Signed option |
159 | 74 | a = ['a-5', 'a7', 'a+2'] |
160 | assert natsorted(a) == ['a-5', 'a+2', 'a7'] | |
161 | assert natsorted(a, signed=False) == ['a7', 'a+2', 'a-5'] | |
75 | assert natsorted(a) == ['a-5', 'a+2', 'a7'] | |
76 | assert natsorted(a, alg=ns.UNSIGNED) == ['a7', 'a+2', 'a-5'] | |
162 | 77 | |
163 | 78 | # Number type == None |
164 | 79 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
165 | assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] | |
166 | assert natsorted(a, number_type=None) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
80 | assert natsorted(a) == ['1.10.1', '1.11', '1.11.4', '1.9.9a', '1.9.9b'] | |
81 | assert natsorted(a, alg=ns.DIGIT) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
167 | 82 | |
168 | 83 | # You can mix types with natsorted. This can get around the new |
169 | 84 | # 'unorderable types' issue with Python 3. |
202 | 117 | '/p/Folder (1)/file.tar.gz', |
203 | 118 | '/p/Folder (10)/file.tar.gz', |
204 | 119 | '/p/Folder/file.tar.gz'] |
205 | assert natsorted(a, as_path=True) == ['/p/Folder/file.tar.gz', | |
206 | '/p/Folder (1)/file.tar.gz', | |
207 | '/p/Folder (1)/file (1).tar.gz', | |
208 | '/p/Folder (10)/file.tar.gz'] | |
120 | assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz', | |
121 | '/p/Folder (1)/file.tar.gz', | |
122 | '/p/Folder (1)/file (1).tar.gz', | |
123 | '/p/Folder (10)/file.tar.gz'] | |
209 | 124 | |
210 | 125 | # You can sort paths and numbers, not that you'd want to |
211 | 126 | a = ['/Folder (9)/file.exe', 43] |
212 | assert natsorted(a, as_path=True) == [43, '/Folder (9)/file.exe'] | |
127 | assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe'] | |
128 | ||
129 | # You can modify how case is interpreted in your sorting. | |
130 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
131 | assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
132 | assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
133 | assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
134 | assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
135 | assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
136 | ||
137 | b = [('A5', 'a6'), ('a3', 'a1')] | |
138 | assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')] | |
139 | assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')] | |
140 | assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')] | |
141 | ||
142 | # You can also do locale-aware sorting | |
143 | locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
144 | assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
145 | a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] | |
146 | assert natsorted(a, alg=ns.LOCALE) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] | |
147 | ||
148 | locale.setlocale(locale.LC_ALL, str('de_DE.UTF-8')) | |
149 | assert natsorted(a, alg=ns.LOCALE) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] | |
150 | locale.setlocale(locale.LC_ALL, str('')) | |
213 | 151 | |
214 | 152 | |
215 | 153 | def test_versorted(): |
216 | 154 | |
217 | 155 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
218 | assert versorted(a) == natsorted(a, number_type=None) | |
156 | assert versorted(a) == natsorted(a, alg=ns.VERSION) | |
219 | 157 | assert versorted(a, reverse=True) == versorted(a)[::-1] |
220 | 158 | a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), |
221 | 159 | ('a', '1.11.4'), ('a', '1.10.1')] |
231 | 169 | '/p/Folder (1)/file1.1.0.tar.gz', |
232 | 170 | '/p/Folder (10)/file1.1.0.tar.gz', |
233 | 171 | '/p/Folder/file1.1.0.tar.gz'] |
234 | assert versorted(a, as_path=True) == ['/p/Folder/file1.1.0.tar.gz', | |
235 | '/p/Folder (1)/file1.1.0.tar.gz', | |
236 | '/p/Folder (1)/file1.1.0 (1).tar.gz', | |
237 | '/p/Folder (10)/file1.1.0.tar.gz'] | |
172 | assert versorted(a, alg=ns.PATH) == ['/p/Folder/file1.1.0.tar.gz', | |
173 | '/p/Folder (1)/file1.1.0.tar.gz', | |
174 | '/p/Folder (1)/file1.1.0 (1).tar.gz', | |
175 | '/p/Folder (10)/file1.1.0.tar.gz'] | |
176 | ||
177 | ||
178 | def test_humansorted(): | |
179 | ||
180 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
181 | assert humansorted(a) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
182 | assert humansorted(a) == natsorted(a, alg=ns.LOCALE) | |
183 | assert humansorted(a, reverse=True) == humansorted(a)[::-1] | |
238 | 184 | |
239 | 185 | |
240 | 186 | def test_index_natsorted(): |
264 | 210 | a = ['/p/Folder (10)/', |
265 | 211 | '/p/Folder/', |
266 | 212 | '/p/Folder (1)/'] |
267 | assert index_natsorted(a, as_path=True) == [1, 2, 0] | |
213 | assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] | |
268 | 214 | |
269 | 215 | |
270 | 216 | def test_index_versorted(): |
271 | 217 | |
272 | 218 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] |
273 | assert index_versorted(a) == index_natsorted(a, number_type=None) | |
219 | assert index_versorted(a) == index_natsorted(a, alg=ns.VERSION) | |
274 | 220 | assert index_versorted(a, reverse=True) == index_versorted(a)[::-1] |
275 | 221 | a = [('a', '1.9.9a'), ('a', '1.11'), ('a', '1.9.9b'), |
276 | 222 | ('a', '1.11.4'), ('a', '1.10.1')] |
281 | 227 | '/p/Folder/file1.1.0.tar.gz', |
282 | 228 | '/p/Folder (1)/file1.1.0 (1).tar.gz', |
283 | 229 | '/p/Folder (1)/file1.1.0.tar.gz'] |
284 | assert index_versorted(a, as_path=True) == [1, 3, 2, 0] | |
230 | assert index_versorted(a, alg=ns.PATH) == [1, 3, 2, 0] | |
231 | ||
232 | ||
233 | def test_index_humansorted(): | |
234 | ||
235 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
236 | assert index_humansorted(a) == [4, 0, 5, 3, 1, 2] | |
237 | assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) | |
238 | assert index_humansorted(a, reverse=True) == index_humansorted(a)[::-1] | |
285 | 239 | |
286 | 240 | |
287 | 241 | def test_order_by_index(): |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | ||
3 | import locale | |
4 | from operator import itemgetter | |
5 | from pytest import raises | |
6 | from natsort.ns_enum import ns | |
7 | from natsort.utils import _input_parser, _py3_safe, _natsort_key, _args_to_enum | |
8 | from natsort.utils import _float_sign_exp_re, _float_nosign_exp_re, _float_sign_noexp_re | |
9 | from natsort.utils import _float_nosign_noexp_re, _int_nosign_re, _int_sign_re | |
10 | from natsort.locale_help import use_pyicu | |
11 | ||
12 | try: | |
13 | from fastnumbers import fast_float, fast_int | |
14 | except ImportError: | |
15 | from natsort.fake_fastnumbers import fast_float, fast_int | |
16 | ||
17 | ||
18 | def test_args_to_enum(): | |
19 | ||
20 | assert _args_to_enum(float, True, True, False, False) == ns.F | |
21 | assert _args_to_enum(float, True, False, False, False) == ns.F | ns.N | |
22 | assert _args_to_enum(float, False, True, False, False) == ns.F | ns.U | |
23 | assert _args_to_enum(float, False, False, False, False) == ns.F | ns.U | ns.N | |
24 | assert _args_to_enum(float, True, True, True, True) == ns.F | ns.P | ns.T | |
25 | assert _args_to_enum(int, True, True, True, False) == ns.I | ns.P | |
26 | assert _args_to_enum(int, False, True, False, True) == ns.I | ns.U | ns.T | |
27 | assert _args_to_enum(None, True, True, False, False) == ns.I | ns.U | |
28 | ||
29 | ||
30 | def test_input_parser(): | |
31 | ||
32 | # fttt = (fast_float, True, True, True) | |
33 | # fttf = (fast_float, True, True, False) | |
34 | ftft = (fast_float, True, False, True) | |
35 | ftff = (fast_float, True, False, False) | |
36 | # fftt = (fast_float, False, True, True) | |
37 | # ffft = (fast_float, False, False, True) | |
38 | # fftf = (fast_float, False, True, False) | |
39 | ffff = (fast_float, False, False, False) | |
40 | ittt = (fast_int, True, True, True) | |
41 | ittf = (fast_int, True, True, False) | |
42 | itft = (fast_int, True, False, True) | |
43 | itff = (fast_int, True, False, False) | |
44 | # iftt = (fast_int, False, True, True) | |
45 | # ifft = (fast_int, False, False, True) | |
46 | # iftf = (fast_int, False, True, False) | |
47 | ifff = (fast_int, False, False, False) | |
48 | ||
49 | assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ffff) == ['a', 5.0, 0.5034] | |
50 | assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ffff) == ['a', 5.0, '+', 0.5034] | |
51 | assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ffff) == ['a', 5.0, 5.034, 'e', -1.0] | |
52 | assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ffff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
53 | assert _input_parser('a5+5.034e-1', _int_nosign_re, *ifff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
54 | assert _input_parser('a5+5.034e-1', _int_sign_re, *ifff) == ['a', 5, 5, '.', 34, 'e', -1] | |
55 | ||
56 | assert _input_parser('a5+5.034e-1', _float_sign_exp_re, *ftff) == ['a', 5.0, '', 0.5034] | |
57 | assert _input_parser('a5+5.034e-1', _float_nosign_exp_re, *ftff) == ['a', 5.0, '+', 0.5034] | |
58 | assert _input_parser('a5+5.034e-1', _float_sign_noexp_re, *ftff) == ['a', 5.0, '', 5.034, 'e', -1.0] | |
59 | assert _input_parser('a5+5.034e-1', _float_nosign_noexp_re, *ftff) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
60 | assert _input_parser('a5+5.034e-1', _int_nosign_re, *itff) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
61 | assert _input_parser('a5+5.034e-1', _int_sign_re, *itff) == ['a', 5, '', 5, '.', 34, 'e', -1] | |
62 | ||
63 | assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ffff) == ['', 6.0, 'a', 5.0, 0.5034] | |
64 | assert _input_parser('6a5+5.034e-1', _float_sign_exp_re, *ftff) == ['', 6.0, 'a', 5.0, '', 0.5034] | |
65 | ||
66 | assert _input_parser('A5+5.034E-1', _float_sign_exp_re, *ftft) == ['aA', 5.0, '', 0.5034] | |
67 | assert _input_parser('A5+5.034E-1', _int_nosign_re, *itft) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] | |
68 | ||
69 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
70 | if use_pyicu: | |
71 | from natsort.locale_help import get_pyicu_transform | |
72 | from locale import getlocale | |
73 | strxfrm = get_pyicu_transform(getlocale()) | |
74 | else: | |
75 | from natsort.locale_help import strxfrm | |
76 | assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittf) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] | |
77 | assert _input_parser('A5+5.034E-1', _int_nosign_re, *ittt) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] | |
78 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
79 | ||
80 | ||
81 | def test_py3_safe(): | |
82 | ||
83 | assert _py3_safe(['a', 'b', 'c']) == ['a', 'b', 'c'] | |
84 | assert _py3_safe(['a']) == ['a'] | |
85 | assert _py3_safe(['a', 5]) == ['a', 5] | |
86 | assert _py3_safe([5, 9]) == [5, '', 9] | |
87 | ||
88 | ||
89 | def test_natsort_key_private(): | |
90 | ||
91 | # The below illustrates how the key works, and how the different options affect sorting. | |
92 | assert _natsort_key('a-5.034e2', key=None, alg=ns.F) == ('a', -503.4) | |
93 | assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT) == ('a', -503.4) | |
94 | assert _natsort_key('a-5.034e2', key=None, alg=ns.FLOAT | ns.NOEXP) == ('a', -5.034, 'e', 2.0) | |
95 | assert _natsort_key('a-5.034e2', key=None, alg=ns.NOEXP) == ('a', -5.034, 'e', 2.0) | |
96 | assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED) == ('a-', 503.4) | |
97 | assert _natsort_key('a-5.034e2', key=None, alg=ns.UNSIGNED | ns.NOEXP) == ('a-', 5.034, 'e', 2.0) | |
98 | assert _natsort_key('a-5.034e2', key=None, alg=ns.INT) == ('a', -5, '.', 34, 'e', 2) | |
99 | assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.NOEXP) == ('a', -5, '.', 34, 'e', 2) | |
100 | assert _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) == ('a-', 5, '.', 34, 'e', 2) | |
101 | assert _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) == _natsort_key('a-5.034e2', key=None, alg=ns.INT | ns.UNSIGNED) | |
102 | assert _natsort_key('a-5.034e2', key=None, alg=ns.DIGIT) == _natsort_key('a-5.034e2', key=None, alg=ns.VERSION) | |
103 | assert _natsort_key('a-5.034e2', key=lambda x: x.upper(), alg=ns.F) == ('A', -503.4) | |
104 | ||
105 | # Iterables are parsed recursively so you can sort lists of lists. | |
106 | assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.F) == (('a', 1.0), ('a', -503.4)) | |
107 | assert _natsort_key(('a1', 'a-5.034e2'), key=None, alg=ns.V) == (('a', 1), ('a-', 5, '.', 34, 'e', 2)) | |
108 | # A key is applied before recursion, but not in the recursive calls. | |
109 | assert _natsort_key(('a1', 'a-5.034e2'), key=itemgetter(1), alg=ns.F) == ('a', -503.4) | |
110 | ||
111 | # Strings that lead with a number get an empty string at the front of the tuple. | |
112 | # This is designed to get around the "unorderable types" issue. | |
113 | assert _natsort_key(('15a', '6'), key=None, alg=ns.F) == (('', 15.0, 'a'), ('', 6.0)) | |
114 | assert _natsort_key(10, key=None, alg=ns.F) == ('', 10) | |
115 | ||
116 | # Turn on as_path to split a file path into components | |
117 | assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
118 | assert _natsort_key('../Folder (10)/file (2).tar.gz', key=None, alg=ns.PATH) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
119 | assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', key=None, alg=ns.PATH) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
120 | ||
121 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
122 | # so it will sort against the other as_path results. | |
123 | assert _natsort_key(10, key=None, alg=ns.PATH) == (('', 10),) | |
124 | # as_path also handles recursion well. | |
125 | assert _natsort_key(('/Folder', '/Folder (1)'), key=None, alg=ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1.0, ')'))) | |
126 | ||
127 | # Turn on py3_safe to put a '' between adjacent numbers | |
128 | assert _natsort_key('43h7+3', key=None, alg=ns.TYPESAFE) == ('', 43.0, 'h', 7.0, '', 3.0) | |
129 | ||
130 | # Invalid arguments give the correct response | |
131 | with raises(ValueError) as err: | |
132 | _natsort_key('a', key=None, alg='1') | |
133 | assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" | |
134 | ||
135 | # Changing the sort order of strings | |
136 | assert _natsort_key('Apple56', key=None, alg=ns.F) == ('Apple', 56.0) | |
137 | assert _natsort_key('Apple56', key=None, alg=ns.IGNORECASE) == ('apple', 56.0) | |
138 | assert _natsort_key('Apple56', key=None, alg=ns.LOWERCASEFIRST) == ('aPPLE', 56.0) | |
139 | assert _natsort_key('Apple56', key=None, alg=ns.GROUPLETTERS) == ('aAppppllee', 56.0) | |
140 | assert _natsort_key('Apple56', key=None, alg=ns.G | ns.LF) == ('aapPpPlLeE', 56.0) | |
141 | ||
142 | # Locale aware sorting | |
143 | locale.setlocale(locale.LC_NUMERIC, str('en_US.UTF-8')) | |
144 | if use_pyicu: | |
145 | from natsort.locale_help import get_pyicu_transform | |
146 | from locale import getlocale | |
147 | strxfrm = get_pyicu_transform(getlocale()) | |
148 | else: | |
149 | from natsort.locale_help import strxfrm | |
150 | assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
151 | assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.0, strxfrm(','), 5.0) | |
152 | ||
153 | locale.setlocale(locale.LC_NUMERIC, str('de_DE.UTF-8')) | |
154 | if use_pyicu: | |
155 | strxfrm = get_pyicu_transform(getlocale()) | |
156 | assert _natsort_key('Apple56.5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
157 | assert _natsort_key('Apple56,5', key=None, alg=ns.LOCALE) == (strxfrm('Apple'), 56.5) | |
158 | locale.setlocale(locale.LC_NUMERIC, str('')) |