Import upstream version 8.3.1
Debian Janitor
1 year, 1 month ago
14 | 14 | runs-on: ubuntu-latest |
15 | 15 | steps: |
16 | 16 | - name: Checkout code |
17 | uses: actions/checkout@v2 | |
17 | uses: actions/checkout@v3 | |
18 | 18 | |
19 | 19 | - name: Set up Python |
20 | uses: actions/setup-python@v2 | |
20 | uses: actions/setup-python@v4 | |
21 | 21 | with: |
22 | python-version: '3.6' | |
22 | python-version: '3.8' | |
23 | 23 | |
24 | 24 | - name: Install black |
25 | 25 | run: pip install black |
32 | 32 | runs-on: ubuntu-latest |
33 | 33 | steps: |
34 | 34 | - name: Checkout code |
35 | uses: actions/checkout@v2 | |
35 | uses: actions/checkout@v3 | |
36 | 36 | |
37 | 37 | - name: Set up Python |
38 | uses: actions/setup-python@v2 | |
38 | uses: actions/setup-python@v4 | |
39 | 39 | with: |
40 | python-version: '3.6' | |
40 | python-version: '3.8' | |
41 | 41 | |
42 | 42 | - name: Install Flake8 |
43 | 43 | run: pip install flake8 flake8-import-order flake8-bugbear pep8-naming |
50 | 50 | runs-on: ubuntu-latest |
51 | 51 | steps: |
52 | 52 | - name: Checkout code |
53 | uses: actions/checkout@v2 | |
53 | uses: actions/checkout@v3 | |
54 | 54 | |
55 | 55 | - name: Set up Python |
56 | uses: actions/setup-python@v2 | |
56 | uses: actions/setup-python@v4 | |
57 | 57 | with: |
58 | python-version: '3.6' | |
58 | python-version: '3.8' | |
59 | 59 | |
60 | 60 | - name: Install MyPy |
61 | 61 | run: pip install mypy hypothesis pytest pytest-mock fastnumbers |
68 | 68 | runs-on: ubuntu-latest |
69 | 69 | steps: |
70 | 70 | - name: Checkout code |
71 | uses: actions/checkout@v2 | |
71 | uses: actions/checkout@v3 | |
72 | 72 | |
73 | 73 | - name: Set up Python |
74 | uses: actions/setup-python@v2 | |
74 | uses: actions/setup-python@v4 | |
75 | 75 | with: |
76 | python-version: '3.6' | |
76 | python-version: '3.8' | |
77 | 77 | |
78 | 78 | - name: Install Validators |
79 | 79 | run: pip install twine check-manifest |
11 | 11 | runs-on: ubuntu-latest |
12 | 12 | steps: |
13 | 13 | - name: Checkout code |
14 | uses: actions/checkout@v2 | |
14 | uses: actions/checkout@v3 | |
15 | 15 | |
16 | 16 | - name: Set up Python |
17 | uses: actions/setup-python@v2 | |
17 | uses: actions/setup-python@v4 | |
18 | 18 | with: |
19 | 19 | python-version: 3.9 |
20 | 20 | |
21 | 21 | - name: Build Source Distribution and Wheel |
22 | 22 | run: | |
23 | 23 | pip install wheel |
24 | python setup.py sdist --format=gztar bdist_wheel | |
24 | python setup.py sdist --format=gztar | |
25 | pip wheel . -w dist | |
25 | 26 | |
26 | 27 | - name: Publish to PyPI |
27 | uses: pypa/gh-action-pypi-publish@master | |
28 | uses: pypa/gh-action-pypi-publish@release/v1 | |
28 | 29 | with: |
29 | 30 | user: __token__ |
30 | 31 | password: ${{ secrets.pypi_token_password }} |
14 | 14 | runs-on: ${{ matrix.os }} |
15 | 15 | strategy: |
16 | 16 | matrix: |
17 | python-version: [3.6, 3.7, 3.8, 3.9, "3.10"] | |
17 | python-version: [3.7, 3.8, 3.9, "3.10", "3.11"] | |
18 | 18 | os: [ubuntu-latest] |
19 | 19 | extras: [false] |
20 | 20 | include: |
24 | 24 | |
25 | 25 | steps: |
26 | 26 | - name: Checkout code |
27 | uses: actions/checkout@v2 | |
27 | uses: actions/checkout@v3 | |
28 | 28 | |
29 | 29 | - name: Set up Python ${{ matrix.python-version }} |
30 | uses: actions/setup-python@v2 | |
30 | uses: actions/setup-python@v4 | |
31 | 31 | with: |
32 | 32 | python-version: ${{ matrix.python-version }} |
33 | 33 | |
57 | 57 | run: coverage xml |
58 | 58 | |
59 | 59 | - name: Upload to CodeCov |
60 | uses: codecov/codecov-action@v1 | |
60 | uses: codecov/codecov-action@v3 | |
61 | ||
62 | test-bsd: | |
63 | name: Test on FreeBSD | |
64 | runs-on: macos-12 | |
65 | ||
66 | steps: | |
67 | - name: Checkout code | |
68 | uses: actions/checkout@v3 | |
69 | ||
70 | - name: Install and Run Tests | |
71 | uses: vmactions/freebsd-vm@v0 | |
72 | with: | |
73 | prepare: | | |
74 | pkg install -y python3 | |
75 | ||
76 | run: | | |
77 | python3 -m venv .venv | |
78 | source .venv/bin/activate.csh | |
79 | pip install --upgrade pip | |
80 | pip install pytest pytest-mock hypothesis | |
81 | python -m pytest --hypothesis-profile=slow-tests |
0 | 0 | Unreleased |
1 | 1 | --- |
2 | ||
3 | [8.3.1] - 2023-03-01 | |
4 | --- | |
5 | ||
6 | ### Fixed | |
7 | - Broken test on FreeBSD due to a broken `locale.strxfrm`. | |
8 | **This change has no effect outside fixing tests** | |
9 | (Issue [#161](https://github.com/SethMMorton/natsort/issues/161)) | |
10 | ||
11 | [8.3.0] - 2023-02-27 | |
12 | --- | |
13 | ||
14 | ### Added | |
15 | - The `PRESORT` option to the `ns` enum to attain consistent | |
16 | sort order in certain corner cases (Issue | |
17 | [#149](https://github.com/SethMMorton/natsort/issues/149)) | |
18 | - Logic to ensure `None` and NaN are sorted in a consistent order | |
19 | (Issue [#149](https://github.com/SethMMorton/natsort/issues/149)) | |
20 | - Explict Python 3.11 support | |
21 | ||
22 | ### Changed | |
23 | - Only convert to `str` if necessary in `os_sorted` | |
24 | ([@Dobatymo](https://github.com/Dobatymo), issues | |
25 | [#157](https://github.com/SethMMorton/natsort/issues/157) and | |
26 | [#158](https://github.com/SethMMorton/natsort/issues/158)) | |
27 | - Attempt to use new `fastnumbers` functionality if available | |
28 | - Move non-API documentation to the GitHub wiki | |
29 | ||
30 | ### Removed | |
31 | ||
32 | - Support for EOL Python 3.6 | |
2 | 33 | |
3 | 34 | [8.2.0] - 2022-09-01 |
4 | 35 | --- |
605 | 636 | - Sorting algorithm to support floats (including exponentials) and basic version number support |
606 | 637 | |
607 | 638 | <!---Comparison links--> |
639 | [8.3.1]: https://github.com/SethMMorton/natsort/compare/8.3.0...8.3.1 | |
640 | [8.3.0]: https://github.com/SethMMorton/natsort/compare/8.2.0...8.3.0 | |
608 | 641 | [8.2.0]: https://github.com/SethMMorton/natsort/compare/8.1.0...8.2.0 |
609 | 642 | [8.1.0]: https://github.com/SethMMorton/natsort/compare/8.0.2...8.1.0 |
610 | 643 | [8.0.2]: https://github.com/SethMMorton/natsort/compare/8.0.1...8.0.2 |
6 | 6 | less usable after the contribution and is backwards-compatible (unless there is |
7 | 7 | a good reason not to be). |
8 | 8 | |
9 | Located in the `dev/` folder is development collateral such as formatting and | |
10 | patching scripts. The only development collateral not in the `dev/` | |
11 | folder are those files that are expected to exist in the the top-level directory | |
12 | (such as `setup.py`, `tox.ini`, and CI configuration). All of these scripts | |
13 | can either be run with the python stdandard library, or have hooks in `tox`. | |
14 | ||
9 | 15 | I do not have strong opinions on how one should contribute, so |
10 | 16 | I have copy/pasted some text verbatim from the |
11 | 17 | [Contributor's Guide](http://docs.python-requests.org/en/latest/dev/contributing/) section of |
12 | [Kenneth Reitz's](http://docs.python-requests.org/en/latest/dev/contributing/) | |
13 | excellent [requests](https://github.com/kennethreitz/requests) library in | |
18 | the [requests](https://github.com/kennethreitz/requests) library in | |
14 | 19 | lieu of coming up with my own. |
15 | 20 | |
16 | 21 | > ### Steps for Submitting Code |
26 | 31 | > - Make your change. |
27 | 32 | > - Run the entire test suite again, confirming that all tests pass including the |
28 | 33 | ones you just added. |
29 | > - Send a GitHub Pull Request to the main repository's master branch. | |
34 | > - Send a GitHub Pull Request to the main repository's main branch. | |
30 | 35 | GitHub Pull Requests are the expected method of code collaboration on this project. |
31 | 36 | |
32 | 37 | > ### Documentation Contributions |
39 | 44 | > When contributing documentation, please do your best to follow the style of the |
40 | 45 | documentation files. This means a soft-limit of 79 characters wide in your text |
41 | 46 | files and a semi-formal, yet friendly and approachable, prose style. |
42 | ||
43 | > When presenting Python code, use single-quoted strings ('hello' instead of "hello"). |
0 | Copyright (c) 2012-2021 Seth M. Morton | |
0 | Copyright (c) 2012-2023 Seth M. Morton | |
1 | 1 | |
2 | 2 | Permission is hereby granted, free of charge, to any person obtaining a copy of |
3 | 3 | this software and associated documentation files (the "Software"), to deal in |
7 | 7 | :target: https://pypi.org/project/natsort/ |
8 | 8 | |
9 | 9 | .. image:: https://img.shields.io/pypi/l/natsort.svg |
10 | :target: https://github.com/SethMMorton/natsort/blob/master/LICENSE | |
10 | :target: https://github.com/SethMMorton/natsort/blob/main/LICENSE | |
11 | 11 | |
12 | 12 | .. image:: https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg |
13 | 13 | :target: https://github.com/SethMMorton/natsort/actions |
14 | 14 | |
15 | .. image:: https://codecov.io/gh/SethMMorton/natsort/branch/master/graph/badge.svg | |
15 | .. image:: https://codecov.io/gh/SethMMorton/natsort/branch/main/graph/badge.svg | |
16 | 16 | :target: https://codecov.io/gh/SethMMorton/natsort |
17 | ||
18 | .. image:: https://img.shields.io/pypi/dw/natsort.svg | |
19 | :target: https://pypi.org/project/natsort/ | |
17 | 20 | |
18 | 21 | Simple yet flexible natural sorting in Python. |
19 | 22 | |
21 | 24 | - Downloads: https://pypi.org/project/natsort/ |
22 | 25 | - Documentation: https://natsort.readthedocs.io/ |
23 | 26 | |
24 | - `Examples and Recipes <https://natsort.readthedocs.io/en/master/examples.html>`_ | |
25 | - `How Does Natsort Work? <https://natsort.readthedocs.io/en/master/howitworks.html>`_ | |
26 | - `API <https://natsort.readthedocs.io/en/master/api.html>`_ | |
27 | - `Examples and Recipes`_ | |
28 | - `How Does Natsort Work?`_ | |
29 | - `API`_ | |
27 | 30 | |
28 | 31 | - `Quick Description`_ |
29 | 32 | - `Quick Examples`_ |
55 | 58 | being sorted in lexicographical order, which sorts numbers like you would |
56 | 59 | letters (i.e. 'b', 'ba', 'c'). |
57 | 60 | |
58 | ``natsort`` provides a function ``natsorted`` that helps sort lists | |
61 | `natsort`_ provides a function `natsorted()`_ that helps sort lists | |
59 | 62 | "naturally" ("naturally" is rather ill-defined, but in general it means |
60 | 63 | sorting based on meaning and not computer code point). |
61 | Using ``natsorted`` is simple: | |
64 | Using `natsorted()`_ is simple: | |
62 | 65 | |
63 | 66 | .. code-block:: pycon |
64 | 67 | |
67 | 70 | >>> natsorted(a) |
68 | 71 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] |
69 | 72 | |
70 | ``natsorted`` identifies numbers anywhere in a string and sorts them | |
71 | naturally. Below are some other things you can do with ``natsort`` | |
72 | (also see the `examples <https://natsort.readthedocs.io/en/master/examples.html>`_ | |
73 | for a quick start guide, or the | |
74 | `api <https://natsort.readthedocs.io/en/master/api.html>`_ for complete details). | |
75 | ||
76 | **Note**: ``natsorted`` is designed to be a drop-in replacement for the | |
77 | built-in ``sorted`` function. Like ``sorted``, ``natsorted`` | |
73 | `natsorted()`_ identifies numbers anywhere in a string and sorts them | |
74 | naturally. Below are some other things you can do with `natsort`_ | |
75 | (also see the `Examples and Recipes`_ for a quick start guide, or the | |
76 | `API`_ for complete details). | |
77 | ||
78 | **Note**: `natsorted()`_ is designed to be a drop-in replacement for the | |
79 | built-in `sorted()`_ function. Like `sorted()`_, `natsorted()`_ | |
78 | 80 | `does not sort in-place`. To sort a list and assign the output to the same |
79 | 81 | variable, you must explicitly assign the output to a variable: |
80 | 82 | |
108 | 110 | Sorting Versions |
109 | 111 | ++++++++++++++++ |
110 | 112 | |
111 | ``natsort`` does not actually *comprehend* version numbers. | |
113 | `natsort`_ does not actually *comprehend* version numbers. | |
112 | 114 | It just so happens that the most common versioning schemes are designed to |
113 | 115 | work with standard natural sorting techniques; these schemes include |
114 | 116 | ``MAJOR.MINOR``, ``MAJOR.MINOR.PATCH``, ``YEAR.MONTH.DAY``. If your data |
115 | 117 | conforms to a scheme like this, then it will work out-of-the-box with |
116 | ``natsorted`` (as of ``natsort`` version >= 4.0.0): | |
118 | `natsorted()`_ (as of `natsort`_ version >= 4.0.0): | |
117 | 119 | |
118 | 120 | .. code-block:: pycon |
119 | 121 | |
122 | 124 | ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] |
123 | 125 | |
124 | 126 | If you need to versions that use a more complicated scheme, please see |
125 | `these examples <https://natsort.readthedocs.io/en/master/examples.html#rc-sorting>`_. | |
127 | `these version sorting examples`_. | |
126 | 128 | |
127 | 129 | Sort Paths Like My File Browser (e.g. Windows Explorer on Windows) |
128 | 130 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
129 | 131 | |
130 | Prior to ``natsort`` version 7.1.0, it was a common request to be able to | |
131 | sort paths like Windows Explorer. As of ``natsort`` 7.1.0, the function | |
132 | ``os_sorted`` has been added to provide users the ability to sort | |
132 | Prior to `natsort`_ version 7.1.0, it was a common request to be able to | |
133 | sort paths like Windows Explorer. As of `natsort`_ 7.1.0, the function | |
134 | `os_sorted()`_ has been added to provide users the ability to sort | |
133 | 135 | in the order that their file browser might sort (e.g Windows Explorer on |
134 | 136 | Windows, Finder on MacOS, Dolphin/Nautilus/Thunar/etc. on Linux). |
135 | 137 | |
143 | 145 | Output will be different depending on the operating system you are on. |
144 | 146 | |
145 | 147 | For users **not** on Windows (e.g. MacOS/Linux) it is **strongly** recommended |
146 | to also install `PyICU <https://pypi.org/project/PyICU>`_, which will help | |
147 | ``natsort`` give results that match most file browsers. If this is not installed, | |
148 | it will fall back on Python's built-in ``locale`` module and will give good | |
148 | to also install `PyICU`_, which will help | |
149 | `natsort`_ give results that match most file browsers. If this is not installed, | |
150 | it will fall back on Python's built-in `locale`_ module and will give good | |
149 | 151 | results for most input, but will give poor results for special characters. |
150 | 152 | |
151 | 153 | Sorting by Real Numbers (i.e. Signed Floats) |
152 | 154 | ++++++++++++++++++++++++++++++++++++++++++++ |
153 | 155 | |
154 | This is useful in scientific data analysis (and was | |
155 | the default behavior of ``natsorted`` for ``natsort`` | |
156 | version < 4.0.0). Use the ``realsorted`` function: | |
156 | This is useful in scientific data analysis (and was the default behavior | |
157 | of `natsorted()`_ for `natsort`_ version < 4.0.0). Use the `realsorted()`_ | |
158 | function: | |
157 | 159 | |
158 | 160 | .. code-block:: pycon |
159 | 161 | |
174 | 176 | This is where the non-numeric characters are also ordered based on their |
175 | 177 | meaning, not on their ordinal value, and a locale-dependent thousands |
176 | 178 | separator and decimal separator is accounted for in the number. |
177 | This can be achieved with the ``humansorted`` function: | |
179 | This can be achieved with the `humansorted()`_ function: | |
178 | 180 | |
179 | 181 | .. code-block:: pycon |
180 | 182 | |
191 | 193 | ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] |
192 | 194 | |
193 | 195 | You may find you need to explicitly set the locale to get this to work |
194 | (as shown in the example). | |
195 | Please see `locale issues <https://natsort.readthedocs.io/en/master/locale_issues.html>`_ and the | |
196 | `Optional Dependencies`_ section below before using the ``humansorted`` function. | |
196 | (as shown in the example). Please see `locale issues`_ and the | |
197 | `Optional Dependencies`_ section below before using the `humansorted()`_ function. | |
197 | 198 | |
198 | 199 | Further Customizing Natsort |
199 | 200 | +++++++++++++++++++++++++++ |
217 | 218 | True |
218 | 219 | |
219 | 220 | All of the available customizations can be found in the documentation for |
220 | `the ns enum <https://natsort.readthedocs.io/en/master/api.html#natsort.ns>`_. | |
221 | `the ns enum`_. | |
221 | 222 | |
222 | 223 | You can also add your own custom transformation functions with the ``key`` |
223 | 224 | argument. These can be used with ``alg`` if you wish. |
231 | 232 | Sorting Mixed Types |
232 | 233 | +++++++++++++++++++ |
233 | 234 | |
234 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types | |
235 | when you sort: | |
235 | You can mix and match `int`_, `float`_, and `str`_ types when you sort: | |
236 | 236 | |
237 | 237 | .. code-block:: pycon |
238 | 238 | |
244 | 244 | Handling Bytes |
245 | 245 | ++++++++++++++ |
246 | 246 | |
247 | ``natsort`` does not officially support the `bytes` type, but | |
248 | convenience functions are provided that help you decode to `str` first: | |
247 | `natsort`_ does not officially support the `bytes`_ type, but | |
248 | convenience functions are provided that help you decode to `str`_ first: | |
249 | 249 | |
250 | 250 | .. code-block:: pycon |
251 | 251 | |
262 | 262 | Generating a Reusable Sorting Key and Sorting In-Place |
263 | 263 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
264 | 264 | |
265 | Under the hood, ``natsorted`` works by generating a custom sorting | |
266 | key using ``natsort_keygen`` and then passes that to the built-in | |
267 | ``sorted``. You can use the ``natsort_keygen`` function yourself to | |
268 | generate a custom sorting key to sort in-place using the ``list.sort`` | |
265 | Under the hood, `natsorted()`_ works by generating a custom sorting | |
266 | key using `natsort_keygen()`_ and then passes that to the built-in | |
267 | `sorted()`_. You can use the `natsort_keygen()`_ function yourself to | |
268 | generate a custom sorting key to sort in-place using the `list.sort()`_ | |
269 | 269 | method. |
270 | 270 | |
271 | 271 | .. code-block:: pycon |
281 | 281 | |
282 | 282 | All of the algorithm customizations mentioned in the |
283 | 283 | `Further Customizing Natsort`_ section can also be applied to |
284 | ``natsort_keygen`` through the *alg* keyword option. | |
284 | `natsort_keygen()`_ through the *alg* keyword option. | |
285 | 285 | |
286 | 286 | Other Useful Things |
287 | 287 | +++++++++++++++++++ |
288 | 288 | |
289 | 289 | - recursively descend into lists of lists |
290 | 290 | - automatic unicode normalization of input data |
291 | - `controlling the case-sensitivity <https://natsort.readthedocs.io/en/master/examples.html#case-sort>`_ | |
292 | - `sorting file paths correctly <https://natsort.readthedocs.io/en/master/examples.html#path-sort>`_ | |
293 | - `allow custom sorting keys <https://natsort.readthedocs.io/en/master/examples.html#custom-sort>`_ | |
294 | - `accounting for units <https://natsort.readthedocs.io/en/master/examples.html#accounting-for-units-when-sorting>`_ | |
291 | - `controlling the case-sensitivity`_ | |
292 | - `sorting file paths correctly`_ | |
293 | - `allow custom sorting keys`_ | |
294 | - `accounting for units`_ | |
295 | 295 | |
296 | 296 | FAQ |
297 | 297 | --- |
298 | 298 | |
299 | How do I debug ``natsort.natsorted()``? | |
300 | The best way to debug ``natsorted()`` is to generate a key using ``natsort_keygen()`` | |
301 | with the same options being passed to ``natsorted``. One can take a look at | |
299 | How do I debug `natsorted()`_? | |
300 | The best way to debug `natsorted()`_ is to generate a key using `natsort_keygen()`_ | |
301 | with the same options being passed to `natsorted()`_. One can take a look at | |
302 | 302 | exactly what is being done with their input using this key - it is highly |
303 | recommended | |
304 | to `look at this issue describing how to debug <https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375>`_ | |
305 | for *how* to debug, and also to review the | |
306 | `How Does Natsort Work? <https://natsort.readthedocs.io/en/master/howitworks.html>`_ | |
307 | page for *why* ``natsort`` is doing that to your data. | |
303 | recommended to `look at this issue describing how to debug`_ for *how* to debug, | |
304 | and also to review the `How Does Natsort Work?`_ page for *why* `natsort`_ is | |
305 | doing that to your data. | |
308 | 306 | |
309 | 307 | If you are trying to sort custom classes and running into trouble, please |
310 | 308 | take a look at https://github.com/SethMMorton/natsort/issues/60. In short, |
311 | 309 | custom classes are not likely to be sorted correctly if one relies |
312 | 310 | on the behavior of ``__lt__`` and the other rich comparison operators in |
313 | 311 | their custom class - it is better to use a ``key`` function with |
314 | ``natsort``, or use the ``natsort`` key as part of your rich comparison | |
312 | `natsort`_, or use the `natsort`_ key as part of your rich comparison | |
315 | 313 | operator definition. |
316 | 314 | |
317 | ``natsort`` gave me results I didn't expect, and it's a terrible library! | |
315 | `natsort`_ gave me results I didn't expect, and it's a terrible library! | |
318 | 316 | Did you try to debug using the above advice? If so, and you still cannot figure out |
319 | the error, then please `file an issue <https://github.com/SethMMorton/natsort/issues/new>`_. | |
320 | ||
321 | How *does* ``natsort`` work? | |
322 | If you don't want to read `How Does Natsort Work? <https://natsort.readthedocs.io/en/master/howitworks.html>`_, | |
317 | the error, then please `file an issue`_. | |
318 | ||
319 | How *does* `natsort`_ work? | |
320 | If you don't want to read `How Does Natsort Work?`_, | |
323 | 321 | here is a quick primer. |
324 | 322 | |
325 | ``natsort`` provides a `key function <https://docs.python.org/3/howto/sorting.html#key-functions>`_ | |
326 | that can be passed to `list.sort() <https://docs.python.org/3/library/stdtypes.html#list.sort>`_ | |
327 | or `sorted() <https://docs.python.org/3/library/functions.html#sorted>`_ in order to | |
328 | modify the default sorting behavior. This key is generated on-demand with | |
329 | the key generator ``natsort.natsort_keygen()``. ``natsort.natsorted()`` | |
330 | is essentially a wrapper for the following code: | |
323 | `natsort`_ provides a `key function`_ that can be passed to `list.sort()`_ | |
324 | or `sorted()`_ in order to modify the default sorting behavior. This key | |
325 | is generated on-demand with the key generator `natsort_keygen()`_. | |
326 | `natsorted()`_ is essentially a wrapper for the following code: | |
331 | 327 | |
332 | 328 | .. code-block:: pycon |
333 | 329 | |
336 | 332 | >>> sorted(['1', '10', '2'], key=natsort_key) |
337 | 333 | ['1', '2', '10'] |
338 | 334 | |
339 | Users can further customize ``natsort`` sorting behavior with the ``key`` | |
335 | Users can further customize `natsort`_ sorting behavior with the ``key`` | |
340 | 336 | and/or ``alg`` options (see details in the `Further Customizing Natsort`_ |
341 | 337 | section). |
342 | 338 | |
343 | The key generated by ``natsort_keygen`` *always* returns a ``tuple``. It | |
339 | The key generated by `natsort_keygen()`_ *always* returns a `tuple`_. It | |
344 | 340 | does so in the following way (*some details omitted for clarity*): |
345 | 341 | |
346 | 342 | 1. Assume the input is a string, and attempt to split it into numbers and |
347 | 343 | non-numbers using regular expressions. Numbers are then converted into |
348 | either ``int`` or ``float``. | |
344 | either `int`_ or `float`_. | |
349 | 345 | 2. If the above fails because the input is not a string, assume the input |
350 | is some other sequence (e.g. ``list`` or ``tuple``), and recursively | |
346 | is some other sequence (e.g. `list`_ or `tuple`_), and recursively | |
351 | 347 | apply the key to each element of the sequence. |
352 | 348 | 3. If the above fails because the input is not iterable, assume the input |
353 | is an ``int`` or ``float``, and just return the input in a ``tuple``. | |
354 | ||
355 | Because a ``tuple`` is always returned, a ``TypeError`` should not be common | |
356 | unless one tries to do something odd like sort an ``int`` against a ``list``. | |
349 | is an `int`_ or `float`_, and just return the input in a `tuple`_. | |
350 | ||
351 | Because a `tuple`_ is always returned, a `TypeError`_ should not be common | |
352 | unless one tries to do something odd like sort an `int`_ against a `list`_. | |
357 | 353 | |
358 | 354 | Shell script |
359 | 355 | ------------ |
360 | 356 | |
361 | ``natsort`` comes with a shell script called ``natsort``, or can also be called | |
362 | from the command line with ``python -m natsort``. | |
357 | `natsort`_ comes with a shell script called `natsort`_, or can also be called | |
358 | from the command line with ``python -m natsort``. Check out the | |
359 | `shell script wiki documentation`_ for more details. | |
363 | 360 | |
364 | 361 | Requirements |
365 | 362 | ------------ |
366 | 363 | |
367 | ``natsort`` requires Python 3.6 or greater. | |
364 | `natsort`_ requires Python 3.7 or greater. | |
368 | 365 | |
369 | 366 | Optional Dependencies |
370 | 367 | --------------------- |
373 | 370 | +++++++++++ |
374 | 371 | |
375 | 372 | The most efficient sorting can occur if you install the |
376 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ package | |
373 | `fastnumbers`_ package | |
377 | 374 | (version >=2.0.0); it helps with the string to number conversions. |
378 | ``natsort`` will still run (efficiently) without the package, but if you need | |
375 | `natsort`_ will still run (efficiently) without the package, but if you need | |
379 | 376 | to squeeze out that extra juice it is recommended you include this as a |
380 | dependency. ``natsort`` will not require (or check) that | |
381 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ is installed | |
382 | at installation. | |
377 | dependency. `natsort`_ will not require (or check) that | |
378 | `fastnumbers`_ is installed at installation. | |
383 | 379 | |
384 | 380 | PyICU |
385 | 381 | +++++ |
386 | 382 | |
387 | It is recommended that you install `PyICU <https://pypi.org/project/PyICU>`_ | |
388 | if you wish to sort in a locale-dependent manner, see | |
389 | https://natsort.readthedocs.io/en/master/locale_issues.html for an explanation why. | |
383 | It is recommended that you install `PyICU`_ if you wish to sort in a | |
384 | locale-dependent manner, see this page on `locale issues`_ for an explanation why. | |
390 | 385 | |
391 | 386 | Installation |
392 | 387 | ------------ |
398 | 393 | $ pip install natsort |
399 | 394 | |
400 | 395 | If you want to install the `Optional Dependencies`_, you can use the |
401 | `"extras" notation <https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras>`_ | |
402 | at installation time to install those dependencies as well - use ``fast`` for | |
403 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ and ``icu`` for | |
404 | `PyICU <https://pypi.org/project/PyICU>`_. | |
396 | `"extras" notation`_ at installation time to install those dependencies as | |
397 | well - use ``fast`` for `fastnumbers`_ and ``icu`` for `PyICU`_. | |
405 | 398 | |
406 | 399 | .. code-block:: console |
407 | 400 | |
413 | 406 | How to Run Tests |
414 | 407 | ---------------- |
415 | 408 | |
416 | Please note that ``natsort`` is NOT set-up to support ``python setup.py test``. | |
417 | ||
418 | The recommended way to run tests is with `tox <https://tox.readthedocs.io/en/latest/>`_. | |
419 | After installing ``tox``, running tests is as simple as executing the following | |
420 | in the ``natsort`` directory: | |
409 | Please note that `natsort`_ is NOT set-up to support ``python setup.py test``. | |
410 | ||
411 | The recommended way to run tests is with `tox`_. After installing ``tox``, | |
412 | running tests is as simple as executing the following in the `natsort`_ directory: | |
421 | 413 | |
422 | 414 | .. code-block:: console |
423 | 415 | |
432 | 424 | How to Build Documentation |
433 | 425 | -------------------------- |
434 | 426 | |
435 | If you want to build the documentation for ``natsort``, it is recommended to | |
427 | If you want to build the documentation for `natsort`_, it is recommended to | |
436 | 428 | use ``tox``: |
437 | 429 | |
438 | 430 | .. code-block:: console |
444 | 436 | Dropped Deprecated APIs |
445 | 437 | ----------------------- |
446 | 438 | |
447 | In ``natsort`` version 6.0.0, the following APIs and functions were removed | |
439 | In `natsort`_ version 6.0.0, the following APIs and functions were removed | |
448 | 440 | |
449 | 441 | - ``number_type`` keyword argument (deprecated since 3.4.0) |
450 | 442 | - ``signed`` keyword argument (deprecated since 3.4.0) |
466 | 458 | |
467 | 459 | $ python -Wdefault::DeprecationWarning my-code.py |
468 | 460 | |
469 | By default ``DeprecationWarnings`` are not shown, but this will cause them | |
461 | By default `DeprecationWarnings`_ are not shown, but this will cause them | |
470 | 462 | to be shown. Alternatively, you can just set the environment variable |
471 | 463 | ``PYTHONWARNINGS`` to "default::DeprecationWarning" and then run your code. |
472 | 464 | |
478 | 470 | History |
479 | 471 | ------- |
480 | 472 | |
481 | Please visit the changelog | |
482 | `on GitHub <https://github.com/SethMMorton/natsort/blob/master/CHANGELOG.md>`_ or | |
483 | `in the documentation <https://natsort.readthedocs.io/en/master/changelog.html>`_. | |
473 | Please visit the changelog `on GitHub`_ or `in the documentation`_. | |
474 | ||
475 | .. _natsort: https://natsort.readthedocs.io/en/stable/index.html | |
476 | .. _natsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsorted | |
477 | .. _natsort_keygen(): https://natsort.readthedocs.io/en/stable/api.html#natsort.natsort_keygen | |
478 | .. _realsorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.realsorted | |
479 | .. _humansorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.humansorted | |
480 | .. _os_sorted(): https://natsort.readthedocs.io/en/stable/api.html#natsort.os_sorted | |
481 | .. _the ns enum: https://natsort.readthedocs.io/en/stable/api.html#natsort.ns | |
482 | .. _fastnumbers: https://github.com/SethMMorton/fastnumbers | |
483 | .. _sorted(): https://docs.python.org/3/library/functions.html#sorted | |
484 | .. _list.sort(): https://docs.python.org/3/library/stdtypes.html#list.sort | |
485 | .. _key function: https://docs.python.org/3/howto/sorting.html#key-functions | |
486 | .. _locale: https://docs.python.org/3/library/locale.html | |
487 | .. _int: https://docs.python.org/3/library/functions.html#int | |
488 | .. _float: https://docs.python.org/3/library/functions.html#float | |
489 | .. _str: https://docs.python.org/3/library/stdtypes.html#str | |
490 | .. _bytes: https://docs.python.org/3/library/stdtypes.html#bytes | |
491 | .. _list: https://docs.python.org/3/library/stdtypes.html#list | |
492 | .. _tuple: https://docs.python.org/3/library/stdtypes.html#tuple | |
493 | .. _TypeError: https://docs.python.org/3/library/exceptions.html#TypeError | |
494 | .. _DeprecationWarnings: https://docs.python.org/3/library/exceptions.html#DeprecationWarning | |
495 | .. _"extras" notation: https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras | |
496 | .. _PyICU: https://pypi.org/project/PyICU | |
497 | .. _tox: https://tox.readthedocs.io/en/latest/ | |
498 | .. _Examples and Recipes: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes | |
499 | .. _How Does Natsort Work?: https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F | |
500 | .. _API: https://natsort.readthedocs.io/en/stable/api.html | |
501 | .. _on GitHub: https://github.com/SethMMorton/natsort/blob/main/CHANGELOG.md | |
502 | .. _in the documentation: https://natsort.readthedocs.io/en/stable/changelog.html | |
503 | .. _file an issue: https://github.com/SethMMorton/natsort/issues/new | |
504 | .. _look at this issue describing how to debug: https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375 | |
505 | .. _controlling the case-sensitivity: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#controlling-case-when-sorting | |
506 | .. _sorting file paths correctly: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sort-os-generated-paths | |
507 | .. _allow custom sorting keys: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#using-a-custom-sorting-key | |
508 | .. _accounting for units: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#accounting-for-units-when-sorting | |
509 | .. _these version sorting examples: https://github.com/SethMMorton/natsort/wiki/Examples-and-Recipes#sorting-more-expressive-versioning-schemes | |
510 | .. _locale issues: https://github.com/SethMMorton/natsort/wiki/Possible-Issues-with-natsort.humansorted-or-ns.LOCALE | |
511 | .. _shell script wiki documentation: https://github.com/SethMMorton/natsort/wiki/Shell-Script⏎ |
0 | 0 | # Release Checklist |
1 | 1 | |
2 | - [ ] Get master to the appropriate code release state. | |
3 | [Travis CI](https://travis-ci.com/SethMMorton/natsort) must be passing: | |
4 | [![Build Status](https://travis-ci.com/SethMMorton/natsort.svg?branch=master)](https://travis-ci.com/SethMMorton/natsort) | |
2 | - [ ] Get main to the appropriate code release state. | |
3 | [GitHub Actions](https://github.com/SethMMorton/natsort/actions) must be passing: | |
4 | [![Build Status](https://github.com/SethMMorton/natsort/workflows/Tests/badge.svg)](https://github.com/SethMMorton/natsort/actions) | |
5 | 5 | |
6 | 6 | - [ ] Ensure that the `CHANGELOG.md` includes the changes made since last release. |
7 | 7 | Please follow the style outlined in https://keepachangelog.com/. |
22 | 22 | git push |
23 | 23 | ``` |
24 | 24 | |
25 | - [ ] Check that the [Travis CI build](https://travis-ci.com/SethMMorton/natsort) has | |
26 | deployed correctly to [the test PyPI](https://test.pypi.org/project/natsort/#history). | |
27 | ||
28 | 25 | - [ ] Push the tag: |
29 | 26 | |
30 | 27 | ```bash |
31 | 28 | git push --tags |
32 | 29 | ``` |
33 | 30 | |
34 | - [ ] Check that the tagged [Travis CI build](https://travis-ci.com/SethMMorton/natsort) has | |
31 | - [ ] Check that the tagged [GitHub Actions build](https://github.com/SethMMorton/natsort/actions) has | |
35 | 32 | deployed correctly to [PyPI](https://pypi.org/project/natsort/#history). |
36 | 33 | |
37 | 34 | - [ ] Check installation: |
58 | 58 | # built documents. |
59 | 59 | # |
60 | 60 | # The full version, including alpha/beta/rc tags. |
61 | release = "8.2.0" | |
61 | release = "8.3.1" | |
62 | 62 | # The short X.Y version. |
63 | 63 | version = ".".join(release.split(".")[0:2]) |
64 | 64 |
5 | 5 | Examples and Recipes |
6 | 6 | ==================== |
7 | 7 | |
8 | If you want more detailed examples than given on this page, please see | |
9 | https://github.com/SethMMorton/natsort/tree/master/tests. | |
10 | ||
11 | .. contents:: | |
12 | :local: | |
13 | ||
14 | Basic Usage | |
15 | ----------- | |
16 | ||
17 | In the most basic use case, simply import :func:`~natsorted` and use | |
18 | it as you would :func:`sorted`: | |
19 | ||
20 | .. code-block:: pycon | |
21 | ||
22 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
23 | >>> sorted(a) | |
24 | ['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in'] | |
25 | >>> from natsort import natsorted, ns | |
26 | >>> natsorted(a) | |
27 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
28 | ||
29 | Sort Version Numbers | |
30 | -------------------- | |
31 | ||
32 | As of :mod:`natsort` version >= 4.0.0, :func:`~natsorted` will work for | |
33 | well-behaved version numbers, like ``MAJOR.MINOR.PATCH``. | |
34 | ||
35 | .. _rc_sorting: | |
36 | ||
37 | Sorting More Expressive Versioning Schemes | |
38 | ++++++++++++++++++++++++++++++++++++++++++ | |
39 | ||
40 | By default, if you wish to sort versions that are not as simple as | |
41 | ``MAJOR.MINOR.PATCH`` (or similar), you may not get the results you expect: | |
42 | ||
43 | .. code-block:: pycon | |
44 | ||
45 | >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] | |
46 | >>> natsorted(a) | |
47 | ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] | |
48 | ||
49 | To make the '1.2' pre-releases come before '1.2.1', you need to use the | |
50 | following recipe: | |
51 | ||
52 | .. code-block:: pycon | |
53 | ||
54 | >>> natsorted(a, key=lambda x: x.replace('.', '~')) | |
55 | ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] | |
56 | ||
57 | If you also want '1.2' after all the alpha, beta, and rc candidates, you can | |
58 | modify the above recipe: | |
59 | ||
60 | .. code-block:: pycon | |
61 | ||
62 | >>> natsorted(a, key=lambda x: x.replace('.', '~')+'z') | |
63 | ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] | |
64 | ||
65 | Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to | |
66 | see why this works. | |
67 | ||
68 | Sorting Rigorously Defined Versioning Schemes (e.g. SemVer or PEP 440) | |
69 | """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" | |
70 | ||
71 | If you know you are using a versioning scheme that follows a well-defined format | |
72 | for which there is third-party module support, you should use those modules | |
73 | to assist in sorting. Some examples might be | |
74 | `PEP 440 <https://packaging.pypa.io/en/latest/version>`_ or | |
75 | `SemVer <https://python-semver.readthedocs.io/en/latest/api.html>`_. | |
76 | ||
77 | If we are being honest, using these methods to parse a version means you don't | |
78 | need to use :mod:`natsort` - you should probably just use :func:`sorted` | |
79 | directly. Here's an example with SemVer: | |
80 | ||
81 | .. code-block:: pycon | |
82 | ||
83 | >>> from semver import VersionInfo | |
84 | >>> a = ['3.4.5-pre.1', '3.4.5', '3.4.5-pre.2+build.4'] | |
85 | >>> sorted(a, key=VersionInfo.parse) | |
86 | ['3.4.5-pre.1', '3.4.5-pre.2+build.4', '3.4.5'] | |
87 | ||
88 | .. _path_sort: | |
89 | ||
90 | Sort OS-Generated Paths | |
91 | ----------------------- | |
92 | ||
93 | In some cases when sorting file paths with OS-Generated names, the default | |
94 | :mod:`~natsorted` algorithm may not be sufficient. In cases like these, | |
95 | you may need to use the ``ns.PATH`` option: | |
96 | ||
97 | .. code-block:: pycon | |
98 | ||
99 | >>> a = ['./folder/file (1).txt', | |
100 | ... './folder/file.txt', | |
101 | ... './folder (1)/file.txt', | |
102 | ... './folder (10)/file.txt'] | |
103 | >>> natsorted(a) | |
104 | ['./folder (1)/file.txt', './folder (10)/file.txt', './folder/file (1).txt', './folder/file.txt'] | |
105 | >>> natsorted(a, alg=ns.PATH) | |
106 | ['./folder/file.txt', './folder/file (1).txt', './folder (1)/file.txt', './folder (10)/file.txt'] | |
107 | ||
108 | Locale-Aware Sorting (Human Sorting) | |
109 | ------------------------------------ | |
110 | ||
111 | .. note:: | |
112 | Please read :ref:`locale_issues` before using ``ns.LOCALE``, :func:`humansorted`, | |
113 | or :func:`index_humansorted`. | |
114 | ||
115 | You can instruct :mod:`natsort` to use locale-aware sorting with the | |
116 | ``ns.LOCALE`` option. In addition to making this understand non-ASCII | |
117 | characters, it will also properly interpret non-'.' decimal separators | |
118 | and also properly order case. It may be more convenient to just use | |
119 | the :func:`humansorted` function: | |
120 | ||
121 | .. code-block:: pycon | |
122 | ||
123 | >>> from natsort import humansorted | |
124 | >>> import locale | |
125 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
126 | 'en_US.UTF-8' | |
127 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
128 | >>> natsorted(a, alg=ns.LOCALE) | |
129 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
130 | >>> humansorted(a) | |
131 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
132 | ||
133 | You may find that if you do not explicitly set the locale your results may not | |
134 | be as you expect... I have found that it depends on the system you are on. | |
135 | If you use `PyICU <https://pypi.org/project/PyICU>`_ (see below) then | |
136 | you should not need to do this. | |
137 | ||
138 | .. _case_sort: | |
139 | ||
140 | Controlling Case When Sorting | |
141 | ----------------------------- | |
142 | ||
143 | For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e. | |
144 | it sorts by the character's value in the ASCII table). For example: | |
145 | ||
146 | .. code-block:: pycon | |
147 | ||
148 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
149 | >>> natsorted(a) | |
150 | ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
151 | ||
152 | There are times when you wish to ignore the case when sorting, | |
153 | you can easily do this with the ``ns.IGNORECASE`` option: | |
154 | ||
155 | .. code-block:: pycon | |
156 | ||
157 | >>> natsorted(a, alg=ns.IGNORECASE) | |
158 | ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
159 | ||
160 | Note that's since Python's sorting is stable, the order of equivalent | |
161 | elements after lowering the case is the same order they appear in the | |
162 | original list. | |
163 | ||
164 | Upper-case letters appear first in the ASCII table, but many natural | |
165 | sorting methods place lower-case first. To do this, use | |
166 | ``ns.LOWERCASEFIRST``: | |
167 | ||
168 | .. code-block:: pycon | |
169 | ||
170 | >>> natsorted(a, alg=ns.LOWERCASEFIRST) | |
171 | ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
172 | ||
173 | It may be undesirable to have the upper-case letters grouped together | |
174 | and the lower-case letters grouped together; most would expect all | |
175 | "a"s to bet together regardless of case, and all "b"s, and so on. To | |
176 | achieve this, use ``ns.GROUPLETTERS``: | |
177 | ||
178 | .. code-block:: pycon | |
179 | ||
180 | >>> natsorted(a, alg=ns.GROUPLETTERS) | |
181 | ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
182 | ||
183 | You might combine this with ``ns.LOWERCASEFIRST`` to get what most | |
184 | would expect to be "natural" sorting: | |
185 | ||
186 | .. code-block:: pycon | |
187 | ||
188 | >>> natsorted(a, alg=ns.G | ns.LF) | |
189 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
190 | ||
191 | Customizing Float Definition | |
192 | ---------------------------- | |
193 | ||
194 | You can make :func:`~natsorted` search for any float that would be | |
195 | a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. | |
196 | using the ``ns.FLOAT`` key. You can disable the exponential component | |
197 | of the number with ``ns.NOEXP``. | |
198 | ||
199 | .. code-block:: pycon | |
200 | ||
201 | >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] | |
202 | >>> natsorted(a, alg=ns.FLOAT) | |
203 | ['a50', 'a5.034e1', 'a51.', 'a+50.300', 'a+50.4'] | |
204 | >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED) | |
205 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
206 | >>> natsorted(a, alg=ns.FLOAT | ns.SIGNED | ns.NOEXP) | |
207 | ['a5.034e1', 'a50', 'a+50.300', 'a+50.4', 'a51.'] | |
208 | ||
209 | For convenience, the ``ns.REAL`` option is provided which is a shortcut | |
210 | for ``ns.FLOAT | ns.SIGNED`` and can be used to sort on real numbers. | |
211 | This can be easily accessed with the :func:`~realsorted` convenience | |
212 | function. Please note that the behavior of the :func:`~realsorted` function | |
213 | was the default behavior of :func:`~natsorted` for :mod:`natsort` | |
214 | version < 4.0.0: | |
215 | ||
216 | .. code-block:: pycon | |
217 | ||
218 | >>> natsorted(a, alg=ns.REAL) | |
219 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
220 | >>> from natsort import realsorted | |
221 | >>> realsorted(a) | |
222 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] | |
223 | ||
224 | .. _custom_sort: | |
225 | ||
226 | Using a Custom Sorting Key | |
227 | -------------------------- | |
228 | ||
229 | Like the built-in ``sorted`` function, ``natsorted`` can accept a custom | |
230 | sort key so that: | |
231 | ||
232 | .. code-block:: pycon | |
233 | ||
234 | >>> from operator import attrgetter, itemgetter | |
235 | >>> a = [['a', 'num4'], ['b', 'num8'], ['c', 'num2']] | |
236 | >>> natsorted(a, key=itemgetter(1)) | |
237 | [['c', 'num2'], ['a', 'num4'], ['b', 'num8']] | |
238 | >>> class Foo: | |
239 | ... def __init__(self, bar): | |
240 | ... self.bar = bar | |
241 | ... def __repr__(self): | |
242 | ... return "Foo('{}')".format(self.bar) | |
243 | >>> b = [Foo('num3'), Foo('num5'), Foo('num2')] | |
244 | >>> natsorted(b, key=attrgetter('bar')) | |
245 | [Foo('num2'), Foo('num3'), Foo('num5')] | |
246 | ||
247 | .. _unit_sorting: | |
248 | ||
249 | Accounting for Units When Sorting | |
250 | +++++++++++++++++++++++++++++++++ | |
251 | ||
252 | :mod:`natsort` does not come with any pre-built mechanism to sort units, | |
253 | but you can write your own `key` to do this. Below, I will demonstrate sorting | |
254 | imperial lengths (e.g. feet an inches), but of course you can extend this to any | |
255 | set of units you need. This example is based on code | |
256 | `from this issue <https://github.com/SethMMorton/natsort/issues/100#issuecomment-530659310>`_, | |
257 | and uses the function :func:`natsort.numeric_regex_chooser` to build a regular | |
258 | expression that will parse numbers in the same manner as :mod:`natsort` itself. | |
259 | ||
260 | .. code-block:: pycon | |
261 | ||
262 | >>> import re | |
263 | >>> import natsort | |
264 | >>> | |
265 | >>> # Define how each unit will be transformed | |
266 | >>> conversion_mapping = { | |
267 | ... "in": 1, | |
268 | ... "inch": 1, | |
269 | ... "inches": 1, | |
270 | ... "ft": 12, | |
271 | ... "feet": 12, | |
272 | ... "foot": 12, | |
273 | ... } | |
274 | >>> | |
275 | >>> # This regular expression searches for numbers and units | |
276 | >>> all_units = "|".join(conversion_mapping.keys()) | |
277 | >>> float_re = natsort.numeric_regex_chooser(natsort.FLOAT | natsort.SIGNED) | |
278 | >>> unit_finder = re.compile(r"({})\s*({})".format(float_re, all_units), re.IGNORECASE) | |
279 | >>> | |
280 | >>> def unit_replacer(matchobj): | |
281 | ... """ | |
282 | ... Given a regex match object, return a replacement string where units are modified | |
283 | ... """ | |
284 | ... number = matchobj.group(1) | |
285 | ... unit = matchobj.group(2) | |
286 | ... new_number = float(number) * conversion_mapping[unit] | |
287 | ... return "{} in".format(new_number) | |
288 | ... | |
289 | >>> # Demo time! | |
290 | >>> data = ['1 ft', '5 in', '10 ft', '2 in'] | |
291 | >>> [unit_finder.sub(unit_replacer, x) for x in data] | |
292 | ['12.0 in', '5.0 in', '120.0 in', '2.0 in'] | |
293 | >>> | |
294 | >>> natsort.natsorted(data, key=lambda x: unit_finder.sub(unit_replacer, x)) | |
295 | ['2 in', '5 in', '1 ft', '10 ft'] | |
296 | ||
297 | Generating a Natsort Key | |
298 | ------------------------ | |
299 | ||
300 | If you need to sort a list in-place, you cannot use :func:`~natsorted`; you | |
301 | need to pass a key to the :meth:`list.sort` method. The function | |
302 | :func:`~natsort_keygen` is a convenient way to generate these keys for you: | |
303 | ||
304 | .. code-block:: pycon | |
305 | ||
306 | >>> from natsort import natsort_keygen | |
307 | >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] | |
308 | >>> natsort_key = natsort_keygen(alg=ns.FLOAT) | |
309 | >>> a.sort(key=natsort_key) | |
310 | >>> a | |
311 | ['a50', 'a50.300', 'a5.034e1', 'a50.4', 'a51.'] | |
312 | ||
313 | :func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the | |
314 | `reverse` option). | |
315 | ||
316 | Sorting Multiple Lists According to a Single List | |
317 | ------------------------------------------------- | |
318 | ||
319 | Sometimes you have multiple lists, and you want to sort one of those | |
320 | lists and reorder the other lists according to how the first was sorted. | |
321 | To achieve this you could use the :func:`~index_natsorted` in combination | |
322 | with the convenience function | |
323 | :func:`~order_by_index`: | |
324 | ||
325 | .. code-block:: pycon | |
326 | ||
327 | >>> from natsort import index_natsorted, order_by_index | |
328 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
329 | >>> b = [4, 5, 6, 7, 8] | |
330 | >>> c = ['hi', 'lo', 'ah', 'do', 'up'] | |
331 | >>> index = index_natsorted(a) | |
332 | >>> order_by_index(a, index) | |
333 | ['a1', 'a2', 'a4', 'a9', 'a10'] | |
334 | >>> order_by_index(b, index) | |
335 | [6, 4, 7, 5, 8] | |
336 | >>> order_by_index(c, index) | |
337 | ['ah', 'hi', 'do', 'lo', 'up'] | |
338 | ||
339 | Returning Results in Reverse Order | |
340 | ---------------------------------- | |
341 | ||
342 | Just like the :func:`sorted` built-in function, you can supply the | |
343 | ``reverse`` option to return the results in reverse order: | |
344 | ||
345 | .. code-block:: pycon | |
346 | ||
347 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
348 | >>> natsorted(a, reverse=True) | |
349 | ['a10', 'a9', 'a4', 'a2', 'a1'] | |
350 | ||
351 | Sorting Bytes | |
352 | ------------- | |
353 | ||
354 | Python is rather strict about comparing strings and bytes, and this | |
355 | can make it difficult to deal with collections of both. Because of the | |
356 | challenge of guessing which encoding should be used to decode a bytes | |
357 | array to a string, :mod:`natsort` does *not* try to guess and automatically | |
358 | convert for you; in fact, the official stance of :mod:`natsort` is to | |
359 | not support sorting bytes. Instead, some decoding convenience functions | |
360 | have been provided to you (see :ref:`bytes_help`) that allow you to | |
361 | provide a codec for decoding bytes through the ``key`` argument that | |
362 | will allow :mod:`natsort` to convert byte arrays to strings for sorting; | |
363 | these functions know not to raise an error if the input is not a byte | |
364 | array, so you can use the key on any arbitrary collection of data. | |
365 | ||
366 | .. code-block:: pycon | |
367 | ||
368 | >>> from natsort import as_ascii | |
369 | >>> a = [b'a', 14.0, 'b'] | |
370 | >>> # natsorted(a) would raise a TypeError (bytes() < str()) | |
371 | >>> natsorted(a, key=as_ascii) == [14.0, b'a', 'b'] | |
372 | True | |
373 | ||
374 | Additionally, regular expressions cannot be run on byte arrays, making it | |
375 | so that :mod:`natsort` cannot parse them for numbers. As a result, if you | |
376 | run :mod:`natsort` on a list of bytes, you will get results that are like | |
377 | Python's default sorting behavior. Of course, you can use the decoding | |
378 | functions to solve this: | |
379 | ||
380 | .. code-block:: pycon | |
381 | ||
382 | >>> from natsort import as_utf8 | |
383 | >>> a = [b'a56', b'a5', b'a6', b'a40'] | |
384 | >>> natsorted(a) # doctest: +SKIP | |
385 | [b'a40', b'a5', b'a56', b'a6'] | |
386 | >>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56'] | |
387 | True | |
388 | ||
389 | If you need a codec different from ASCII or UTF-8, you can use | |
390 | :func:`decoder` to generate a custom key: | |
391 | ||
392 | .. code-block:: pycon | |
393 | ||
394 | >>> from natsort import decoder | |
395 | >>> a = [b'a56', b'a5', b'a6', b'a40'] | |
396 | >>> natsorted(a, key=decoder('latin1')) == [b'a5', b'a6', b'a40', b'a56'] | |
397 | True | |
398 | ||
399 | Sorting a Pandas DataFrame | |
400 | -------------------------- | |
401 | ||
402 | Starting from Pandas version 1.1.0, the | |
403 | `sorting methods accept a "key" argument <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html>`_, | |
404 | so you can simply pass :func:`natsort_keygen` to the sorting methods and sort: | |
405 | ||
406 | .. code-block:: python | |
407 | ||
408 | import pandas as pd | |
409 | from natsort import natsort_keygen | |
410 | s = pd.Series(['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in']) | |
411 | s.sort_values(key=natsort_keygen()) | |
412 | # 1 1 ft 5 in | |
413 | # 0 2 ft 7 in | |
414 | # 3 2 ft 11 in | |
415 | # 4 7 ft 6 in | |
416 | # 2 10 ft 2 in | |
417 | # dtype: object | |
418 | ||
419 | Similarly, if you need to sort the index there is | |
420 | `sort_index <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_index.html>`_ | |
421 | of a DataFrame. | |
422 | ||
423 | If you are on an older version of Pandas, check out please check out | |
424 | `this answer on StackOverflow <https://stackoverflow.com/a/29582718/1399279>`_ | |
425 | for ways to do this without the ``key`` argument to ``sort_values``. | |
8 | This page has been moved to the | |
9 | `natsort wiki <https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F>`_. |
5 | 5 | How Does Natsort Work? |
6 | 6 | ====================== |
7 | 7 | |
8 | .. contents:: | |
9 | :local: | |
10 | ||
11 | :mod:`natsort` works by breaking strings into smaller sub-components (numbers | |
12 | or everything else), and returning these components in a tuple. Sorting | |
13 | tuples in Python is well-defined, and this fact is used to sort the input | |
14 | strings properly. But how does one break a string into sub-components? | |
15 | And what does one do to those components once they are split? Below I | |
16 | will explain the algorithm that was chosen for the :mod:`natsort` module, | |
17 | and some of the thinking that went into those design decisions. I will | |
18 | also mention some of the stumbling blocks I ran into because | |
19 | `getting sorting right is surprisingly hard`_. | |
20 | ||
21 | If you are impatient, you can skip to :ref:`tldr1` for the algorithm | |
22 | in the simplest case, and :ref:`tldr2` | |
23 | to see what extra code is needed to handle special cases. | |
24 | ||
25 | First, How Does Natural Sorting Work At a High Level? | |
26 | ----------------------------------------------------- | |
27 | ||
28 | If I want to compare '2 ft 7 in' to '2 ft 11 in', I might do the following | |
29 | ||
30 | .. code-block:: pycon | |
31 | ||
32 | >>> '2 ft 7 in' < '2 ft 11 in' | |
33 | False | |
34 | ||
35 | We as humans know that the above should be true, but why does Python think it | |
36 | is false? Here is how it is performing the comparison: | |
37 | ||
38 | :: | |
39 | ||
40 | '2' <=> '2' ==> equal, so keep going | |
41 | ' ' <=> ' ' ==> equal, so keep going | |
42 | 'f' <=> 'f' ==> equal, so keep going | |
43 | 't' <=> 't' ==> equal, so keep going | |
44 | ' ' <=> ' ' ==> equal, so keep going | |
45 | '7' <=> '1' ==> different, use result of '7' < '1' | |
46 | ||
47 | '7' evaluates as greater than '1' so the statement is false. When sorting, if | |
48 | a value is less than another it is placed first, so in our above example | |
49 | '2 ft 11 in' would end up before '2 ft 7 in', which is not correct. What to do? | |
50 | ||
51 | The best way to handle this is to break the string into sub-components | |
52 | of numbers and non-numbers, and then convert the numeric parts into | |
53 | :func:`float` or :func:`int` types. This will force Python to | |
54 | actually understand the context of what it is sorting and then "do the | |
55 | right thing." Luckily, it handles sorting lists of strings right | |
56 | out-of-the-box, so the only hard part is actually making this string-to-list | |
57 | transformation and then Python will handle the rest. | |
58 | ||
59 | :: | |
60 | ||
61 | '2 ft 7 in' ==> (2, ' ft ', 7, ' in') | |
62 | '2 ft 11 in' ==> (2, ' ft ', 11, ' in') | |
63 | ||
64 | When Python compares the two, it roughly follows the below logic: | |
65 | ||
66 | :: | |
67 | ||
68 | 2 <=> 2 ==> equal, so keep going | |
69 | ' ft ' <=> ' ft ' ==> a string is a special type of sequence - evaluate each character individually | |
70 | || | |
71 | --> | |
72 | ' ' <=> ' ' ==> equal, so keep going | |
73 | 'f' <=> 'f' ==> equal, so keep going | |
74 | 't' <=> 't' ==> equal, so keep going | |
75 | ' ' <=> ' ' ==> equal, so keep going | |
76 | <== Back to parent sequence | |
77 | 7 <=> 11 ==> different, use the result of 7 < 11 | |
78 | ||
79 | Clearly, seven is less than eleven, so our comparison is as we expect, and we | |
80 | would get the sorting order we wanted. | |
81 | ||
82 | At its heart, :mod:`natsort` is simply a tool to break strings into tuples, | |
83 | turning numbers in strings (i.e. ``'79'``) into *ints* and *floats* as it does this. | |
84 | ||
85 | Natsort's Approach | |
86 | ------------------ | |
87 | ||
88 | .. contents:: | |
89 | :local: | |
90 | ||
91 | Decomposing Strings Into Sub-Components | |
92 | +++++++++++++++++++++++++++++++++++++++ | |
93 | ||
94 | The first major hurtle to overcome is to decompose the string into | |
95 | sub-components. Remarkably, this turns out to be the easy part, owing mostly | |
96 | to Python's easy access to regular expressions. Breaking an arbitrary string | |
97 | based on a pattern is pretty straightforward. | |
98 | ||
99 | .. code-block:: pycon | |
100 | ||
101 | >>> import re | |
102 | >>> re.split(r'(\d+)', '2 ft 11 in') | |
103 | ['', '2', ' ft ', '11', ' in'] | |
104 | ||
105 | Clear (assuming you can read regular expressions) and concise. | |
106 | ||
107 | The reason I began developing :mod:`natsort` in the first place was because I | |
108 | needed to handle the natural sorting of strings containing *real numbers*, not | |
109 | just unsigned integers as the above example contains. By real numbers, I mean | |
110 | those like ``-45.4920E-23``. :mod:`natsort` can handle just about any number | |
111 | definition; to that end, here are all the regular expressions used in | |
112 | :mod:`natsort`: | |
113 | ||
114 | .. code-block:: pycon | |
115 | ||
116 | >>> unsigned_int = r'([0-9]+)' | |
117 | >>> signed_int = r'([-+]?[0-9]+)' | |
118 | >>> unsigned_float = r'((?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)' | |
119 | >>> signed_float = r'([-+]?(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)' | |
120 | >>> unsigned_float_no_exponent = r'((?:[0-9]+\.?[0-9]*|\.[0-9]+))' | |
121 | >>> signed_float_no_exponent = r'([-+]?(?:[0-9]+\.?[0-9]*|\.[0-9]+))' | |
122 | ||
123 | Note that ``"inf"`` and ``"nan"`` are deliberately omitted from the float | |
124 | definition because you wouldn't want (for example) ``"banana"`` to be converted | |
125 | into ``['ba', 'nan', 'a']``, Let's see an example: | |
126 | ||
127 | .. code-block:: pycon | |
128 | ||
129 | >>> re.split(signed_float, 'The mass of 3 electrons is 2.732815068E-30 kg') | |
130 | ['The mass of ', '3', ' electrons is ', '2.732815068E-30', ' kg'] | |
131 | ||
132 | .. note:: | |
133 | ||
134 | It is a bit of a lie to say the above are the complete regular expressions. In the | |
135 | actual code there is also handling for non-ASCII unicode characters (such as ⑦), | |
136 | but I will ignore that aspect of :mod:`natsort` in this discussion. | |
137 | ||
138 | Now, when the user wants to change the definition of a number, it is as easy as | |
139 | changing the pattern supplied to the regular expression engine. | |
140 | ||
141 | Choosing the right default is hard, though (well, in this case it shouldn't | |
142 | have been but I was rather thick-headed). In retrospect, it should have been | |
143 | obvious that since essentially all the code examples I had/have seen for | |
144 | natural sorting were for *unsigned integers*, I should have made the default | |
145 | definition of a number an *unsigned integer*. But, in the brash days of my | |
146 | youth I assumed that since my use case was real numbers, everyone else would | |
147 | be happier sorting by real numbers; so, I made the default definition of a | |
148 | number a *signed float with exponent*. `This astonished`_ `a lot`_ `of people`_ | |
149 | (`and some people aren't very nice when they are astonished`_). | |
150 | Starting with :mod:`natsort` version 4.0.0 the default number definition was | |
151 | changed to an *unsigned integer* which satisfies the "least astonishment" | |
152 | principle, and I have not heard a complaint since. | |
153 | ||
154 | Coercing Strings Containing Numbers Into Numbers | |
155 | ++++++++++++++++++++++++++++++++++++++++++++++++ | |
156 | ||
157 | There has been some debate on Stack Overflow as to what method is best to | |
158 | coerce a string to a number if it can be coerced, and leaving it alone otherwise | |
159 | (see `this one for coercion`_ and `this one for checking`_ for some high traffic questions), | |
160 | but it mostly boils down to two different solutions, shown here: | |
161 | ||
162 | .. code-block:: pycon | |
163 | ||
164 | >>> def coerce_try_except(x): | |
165 | ... try: | |
166 | ... return int(x) | |
167 | ... except ValueError: | |
168 | ... return x | |
169 | ... | |
170 | >>> def coerce_regex(x): | |
171 | ... # Note that precompiling the regex is more performant, | |
172 | ... # but I do not show that here for clarity's sake. | |
173 | ... return int(x) if re.match(r'[-+]?\d+$', x) else x | |
174 | ... | |
175 | ||
176 | Here are some timing results run on my machine: | |
177 | ||
178 | .. code-block:: pycon | |
179 | ||
180 | In [0]: numbers = list(map(str, range(100))) # A list of numbers as strings | |
181 | ||
182 | In [1]: not_numbers = ['banana' + x for x in numbers] | |
183 | ||
184 | In [2]: %timeit [coerce_try_except(x) for x in numbers] | |
185 | 10000 loops, best of 3: 51.1 µs per loop | |
186 | ||
187 | In [3]: %timeit [coerce_try_except(x) for x in not_numbers] | |
188 | 1000 loops, best of 3: 289 µs per loop | |
189 | ||
190 | In [4]: %timeit [coerce_regex(x) for x in not_numbers] | |
191 | 10000 loops, best of 3: 67.6 µs per loop | |
192 | ||
193 | In [5]: %timeit [coerce_regex(x) for x in numbers] | |
194 | 10000 loops, best of 3: 123 µs per loop | |
195 | ||
196 | What can we learn from this? The ``try: except`` method (arguably the most | |
197 | "pythonic" of the solutions) is best for numeric input, but performs over 5X | |
198 | slower for non-numeric input. Conversely, the regular expression method, though | |
199 | slower than ``try: except`` for both input types, is more efficient for | |
200 | non-numeric input than for input that can be converted to an ``int``. Further, | |
201 | even though the regular expression method is slower for both input types, it is | |
202 | always at least twice as fast as the worst case for the ``try: except``. | |
203 | ||
204 | Why do I care? Shouldn't I just pick a method and not worry about it? Probably. | |
205 | However, I am very conscious about the performance of :mod:`natsort`, and want | |
206 | it to be a true drop-in replacement for :func:`sorted` without having to incur | |
207 | a performance penalty. For the purposes of :mod:`natsort`, there is no clear | |
208 | winner between the two algorithms - the data being passed to this function will | |
209 | likely be a mix of numeric and non-numeric string content. Do I use the | |
210 | ``try: except`` method and hope the speed gains on numbers will offset the | |
211 | non-number performance, or do I use regular expressions and take the more | |
212 | stable performance? | |
213 | ||
214 | It turns out that within the context of :mod:`natsort`, some assumptions can be | |
215 | made that make a hybrid approach attractive. Because all strings are pre-split | |
216 | into numeric and non-numeric content *before* being passed to this coercion | |
217 | function, the assumption can be made that *if a string begins with a digit or a | |
218 | sign, it can be coerced into a number*. | |
219 | ||
220 | .. code-block:: pycon | |
221 | ||
222 | >>> def coerce_to_int(x): | |
223 | ... if x[0] in '0123456789+-': | |
224 | ... try: | |
225 | ... return int(x) | |
226 | ... except ValueError: | |
227 | ... return x | |
228 | ... else: | |
229 | ... return x | |
230 | ... | |
231 | ||
232 | So how does this perform compared to the standard coercion methods? | |
233 | ||
234 | .. code-block:: pycon | |
235 | ||
236 | In [6]: %timeit [coerce_to_int(x) for x in numbers] | |
237 | 10000 loops, best of 3: 71.6 µs per loop | |
238 | ||
239 | In [7]: %timeit [coerce_to_int(x) for x in not_numbers] | |
240 | 10000 loops, best of 3: 26.4 µs per loop | |
241 | ||
242 | The hybrid method eliminates most of the time wasted on numbers checking | |
243 | that it is in fact a number before passing to :func:`int`, and eliminates | |
244 | the time wasted in the exception stack for input that is not a number. | |
245 | ||
246 | That's as fast as we can get, right? In pure Python, probably. At least, it's | |
247 | close. But because I am crazy and a glutton for punishment, I decided to see | |
248 | if I could get any faster writing a C extension. It's called | |
249 | `fastnumbers`_ and contains a C implementation of the above coercion functions | |
250 | called :func:`fast_int`. How does it fair? Pretty well. | |
251 | ||
252 | .. code-block:: pycon | |
253 | ||
254 | In [8]: %timeit [fast_int(x) for x in numbers] | |
255 | 10000 loops, best of 3: 30.9 µs per loop | |
256 | ||
257 | In [9]: %timeit [fast_int(x) for x in not_numbers] | |
258 | 10000 loops, best of 3: 30 µs per loop | |
259 | ||
260 | During development of :mod:`natsort`, I wanted to ensure that using it did not | |
261 | get in the way of a user's program by introducing a performance penalty to | |
262 | their code. To that end, I do not feel like my adventures down the rabbit hole | |
263 | of optimization of coercion functions was a waste; I can confidently look users | |
264 | in the eye and say I considered every option in ensuring :mod:`natsort` is as | |
265 | efficient as possible. This is why if `fastnumbers`_ is installed it will be | |
266 | used for this step, and otherwise the hybrid method will be used. | |
267 | ||
268 | .. note:: | |
269 | ||
270 | Modifying the hybrid coercion function for floats is straightforward. | |
271 | ||
272 | .. code-block:: pycon | |
273 | ||
274 | >>> def coerce_to_float(x): | |
275 | ... if x[0] in '.0123456789+-' or x.lower().lstrip()[:3] in ('nan', 'inf'): | |
276 | ... try: | |
277 | ... return float(x) | |
278 | ... except ValueError: | |
279 | ... return x | |
280 | ... else: | |
281 | ... return x | |
282 | ... | |
283 | ||
284 | .. _tldr1: | |
285 | ||
286 | TL;DR 1 - The Simple "No Special Cases" Algorithm | |
287 | +++++++++++++++++++++++++++++++++++++++++++++++++ | |
288 | ||
289 | At this point, our :mod:`natsort` algorithm is essentially the following: | |
290 | ||
291 | .. code-block:: pycon | |
292 | ||
293 | >>> import re | |
294 | >>> def natsort_key(x, as_float=False, signed=False): | |
295 | ... if as_float: | |
296 | ... regex = signed_float if signed else unsigned_float | |
297 | ... else: | |
298 | ... regex = signed_int if signed else unsigned_int | |
299 | ... split_input = re.split(regex, x) | |
300 | ... split_input = filter(None, split_input) # removes null strings | |
301 | ... coerce = coerce_to_float if as_float else coerce_to_int | |
302 | ... return tuple(coerce(s) for s in split_input) | |
303 | ... | |
304 | ||
305 | I have written the above for clarity and not performance. | |
306 | This pretty much matches `most natural sort solutions for python on Stack Overflow`_ | |
307 | (except the above includes customization of the definition of a number). | |
8 | This page has been moved to the | |
9 | `natsort wiki <https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F>`_. | |
308 | 10 | |
309 | 11 | Special Cases Everywhere! |
310 | 12 | ------------------------- |
311 | 13 | |
312 | .. contents:: | |
313 | :local: | |
314 | ||
315 | .. image:: special_cases_everywhere.jpg | |
316 | ||
317 | If what I described in :ref:`TL;DR 1 <tldr1>` were | |
318 | all that :mod:`natsort` needed to | |
319 | do then there probably wouldn't be much need for a third-party module, right? | |
320 | Probably. But it turns out that in real-world data there are a lot of | |
321 | special cases that need to be handled, and in true `80%/20%`_ fashion, the | |
322 | majority of the code in :mod:`natsort` is devoted to handling special cases | |
323 | like those described below. | |
324 | ||
325 | Sorting Filesystem Paths | |
326 | ++++++++++++++++++++++++ | |
327 | ||
328 | `The first major special case I encountered was sorting filesystem paths`_ | |
329 | (if you go to the link, you will see I didn't handle it well for a year... | |
330 | this was before I fully realized how much functionality I could really add | |
331 | to :mod:`natsort`). Let's apply the :func:`natsort_key` from above to some | |
332 | filesystem paths that you might see being auto-generated from your operating | |
333 | system: | |
334 | ||
335 | .. code-block:: pycon | |
336 | ||
337 | >>> paths = ['Folder (10)/file.tar.gz', | |
338 | ... 'Folder/file.tar.gz', | |
339 | ... 'Folder (1)/file (1).tar.gz', | |
340 | ... 'Folder (1)/file.tar.gz'] | |
341 | >>> sorted(paths, key=natsort_key) | |
342 | ['Folder (1)/file (1).tar.gz', 'Folder (1)/file.tar.gz', 'Folder (10)/file.tar.gz', 'Folder/file.tar.gz'] | |
343 | ||
344 | Well that's not right! What is ``'Folder/file.tar.gz'`` doing at the end? | |
345 | It has to do with the numerical ASCII code assigned to the space and | |
346 | ``/`` characters in the `ASCII table`_. According to the `ASCII table`_, the | |
347 | space character (number 32) comes before the ``/`` character (number 47). If | |
348 | we remove the common prefix in all of the above strings (``'Folder'``), we | |
349 | can see why this happens: | |
350 | ||
351 | .. code-block:: pycon | |
352 | ||
353 | >>> ' (1)/file.tar.gz' < '/file.tar.gz' | |
354 | True | |
355 | >>> ' ' < '/' | |
356 | True | |
357 | ||
358 | This isn't very convenient... how do we solve it? We can split the path | |
359 | across the path separators and then sort. A convenient way do to this is | |
360 | with the :data:`Path.parts <pathlib.PurePath.parts>` property from | |
361 | :mod:`pathlib`: | |
362 | ||
363 | .. code-block:: pycon | |
364 | ||
365 | >>> import pathlib | |
366 | >>> sorted(paths, key=lambda x: tuple(natsort_key(s) for s in pathlib.Path(x).parts)) | |
367 | ['Folder/file.tar.gz', 'Folder (1)/file (1).tar.gz', 'Folder (1)/file.tar.gz', 'Folder (10)/file.tar.gz'] | |
368 | ||
369 | Almost! It seems like there is some funny business going on in the final | |
370 | filename component as well. We can solve that nicely and quickly with | |
371 | :data:`Path.suffixes <pathlib.PurePath.suffixes>` and :data:`Path.stem | |
372 | <pathlib.PurePath.stem>`. | |
373 | ||
374 | .. code-block:: pycon | |
375 | ||
376 | >>> def decompose_path_into_components(x): | |
377 | ... path_split = list(pathlib.Path(x).parts) | |
378 | ... # Remove the final filename component from the path. | |
379 | ... final_component = pathlib.Path(path_split.pop()) | |
380 | ... # Split off all the extensions. | |
381 | ... suffixes = final_component.suffixes | |
382 | ... stem = final_component.name.replace(''.join(suffixes), '') | |
383 | ... # Remove the '.' prefix of each extension, and make that | |
384 | ... # final component a list of the stem and each suffix. | |
385 | ... final_component = [stem] + [x[1:] for x in suffixes] | |
386 | ... # Replace the split final filename component. | |
387 | ... path_split.extend(final_component) | |
388 | ... return path_split | |
389 | ... | |
390 | >>> def natsort_key_with_path_support(x): | |
391 | ... return tuple(natsort_key(s) for s in decompose_path_into_components(x)) | |
392 | ... | |
393 | >>> sorted(paths, key=natsort_key_with_path_support) | |
394 | ['Folder/file.tar.gz', 'Folder (1)/file.tar.gz', 'Folder (1)/file (1).tar.gz', 'Folder (10)/file.tar.gz'] | |
395 | ||
396 | This works because in addition to breaking the input by path separators, | |
397 | the final filename component is separated from its extensions as well. | |
398 | *Then*, each of these separated components is sent to the | |
399 | :mod:`natsort` algorithm, so the result is a tuple of tuples. Once that | |
400 | is done, we can see how comparisons can be done in the expected manner. | |
401 | ||
402 | .. code-block:: pycon | |
403 | ||
404 | >>> a = natsort_key_with_path_support('Folder (1)/file (1).tar.gz') | |
405 | >>> a | |
406 | (('Folder (', 1, ')'), ('file (', 1, ')'), ('tar',), ('gz',)) | |
407 | >>> | |
408 | >>> b = natsort_key_with_path_support('Folder/file.tar.gz') | |
409 | >>> b | |
410 | (('Folder',), ('file',), ('tar',), ('gz',)) | |
411 | >>> | |
412 | >>> a > b | |
413 | True | |
414 | ||
415 | .. note:: | |
416 | ||
417 | The actual :meth:`decompose_path_into_components`-equivalent function in | |
418 | :mod:`natsort` actually has a few more heuristics than shown here so that | |
419 | it is not over-zealous in what it defines as a path suffix, but this has | |
420 | been omitted in this how-to for clarity. | |
421 | ||
422 | Comparing Different Types | |
423 | +++++++++++++++++++++++++ | |
424 | ||
425 | `The second major special case I encountered was sorting of different types`_. | |
426 | On Python 2 (i.e. legacy Python), this mostly didnt't matter *too* | |
427 | much since it uses an arbitrary heuristic to allow traditionally un-comparable | |
428 | types to be compared (such as comparing ``'a'`` to ``1``). However, on Python 3 | |
429 | (i.e. Python) it simply won't let you perform such nonsense, raising a | |
430 | :exc:`TypeError` instead. | |
431 | ||
432 | You can imagine that a module that breaks strings into tuples of numbers and | |
433 | strings is walking a dangerous line if it does not have special handling for | |
434 | comparing numbers and strings. My imagination was not so great at first. | |
435 | Let's take a look at all the ways this can fail with real-world data. | |
436 | ||
437 | .. code-block:: pycon | |
438 | ||
439 | >>> def natsort_key_with_poor_real_number_support(x): | |
440 | ... split_input = re.split(signed_float, x) | |
441 | ... split_input = filter(None, split_input) # removes null strings | |
442 | ... return tuple(coerce_to_float(s) for s in split_input) | |
443 | >>> | |
444 | >>> sorted([5, '4'], key=natsort_key_with_poor_real_number_support) | |
445 | Traceback (most recent call last): | |
446 | ... | |
447 | TypeError: ... | |
448 | >>> | |
449 | >>> sorted(['12 apples', 'apples'], key=natsort_key_with_poor_real_number_support) | |
450 | Traceback (most recent call last): | |
451 | ... | |
452 | TypeError: ... | |
453 | >>> | |
454 | >>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_poor_real_number_support) | |
455 | Traceback (most recent call last): | |
456 | ... | |
457 | TypeError: ... | |
458 | ||
459 | Let's break these down. | |
460 | ||
461 | #. The integer ``5`` is sent to ``re.split`` which expects only strings | |
462 | or bytes, which is a no-no. | |
463 | #. ``natsort_key_with_poor_real_number_support('12 apples') < natsort_key_with_poor_real_number_support('apples')`` | |
464 | is the same as ``(12.0, ' apples') < ('apples',)``, and thus a number gets | |
465 | compared to a string [#f1]_ which also is a no-no. | |
466 | #. This one scores big on the astonishment scale, especially if one | |
467 | accidentally uses signed integers or real numbers when they mean | |
468 | to use unsigned integers. | |
469 | ``natsort_key_with_poor_real_number_support('version5.3.0') < natsort_key_with_poor_real_number_support('version5.3rc1')`` | |
470 | is the same as ``('version', 5.3, 0.0) < ('version', 5.3, 'rc', 1.0)``, | |
471 | so in the third element a number gets compared to a string, once again | |
472 | the same old no-no. (The same would happen with ``'version5-3'`` and | |
473 | ``'version5-a'``, which would become ``('version', 5, -3)`` and | |
474 | ``('version', 5, '-a')``). | |
475 | ||
476 | As you might expect, the solution to the first issue is to wrap the | |
477 | ``re.split`` call in a ``try: except:`` block and handle the number specially | |
478 | if a :exc:`TypeError` is raised. The second and third cases *could* be handled | |
479 | in a "special case" manner, meaning only respond and do something different | |
480 | if these problems are detected. But a less error-prone method is to ensure | |
481 | that the data is correct-by-construction, and this can be done by ensuring | |
482 | that the returned tuples *always* start with a string, and then alternate | |
483 | in a string-number-string-number-string pattern; this can be achieved by | |
484 | adding an empty string wherever the pattern is not followed [#f2]_. This ends | |
485 | up working out pretty nicely because empty strings are always "less" than | |
486 | any non-empty string, and we typically want numbers to come before strings. | |
487 | ||
488 | Let's take a look at how this works out. | |
489 | ||
490 | .. code-block:: pycon | |
491 | ||
492 | >>> from natsort.utils import sep_inserter | |
493 | >>> list(sep_inserter(iter(['apples']), '')) | |
494 | ['apples'] | |
495 | >>> | |
496 | >>> list(sep_inserter(iter([12, ' apples']), '')) | |
497 | ['', 12, ' apples'] | |
498 | >>> | |
499 | >>> list(sep_inserter(iter(['version', 5, -3]), '')) | |
500 | ['version', 5, '', -3] | |
501 | >>> | |
502 | >>> from natsort import natsort_keygen, ns | |
503 | >>> natsort_key_with_good_real_number_support = natsort_keygen(alg=ns.REAL) | |
504 | >>> | |
505 | >>> sorted([5, '4'], key=natsort_key_with_good_real_number_support) | |
506 | ['4', 5] | |
507 | >>> | |
508 | >>> sorted(['12 apples', 'apples'], key=natsort_key_with_good_real_number_support) | |
509 | ['12 apples', 'apples'] | |
510 | >>> | |
511 | >>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_good_real_number_support) | |
512 | ['version5.3.0', 'version5.3rc1'] | |
513 | ||
514 | How the "good" version works will be given in | |
515 | `TL;DR 2 - Handling Crappy, Real-World Input`_. | |
516 | ||
517 | Handling NaN | |
518 | ++++++++++++ | |
519 | ||
520 | `A rather unexpected special case I encountered was sorting collections containing NaN`_. | |
521 | Let's see what happens when you try to sort a plain old list of numbers when there | |
522 | is a **NaN** floating around in there. | |
523 | ||
524 | .. code-block:: pycon | |
525 | ||
526 | >>> danger = [7, float('nan'), 22.7, 19, -14, 59.123, 4] | |
527 | >>> sorted(danger) | |
528 | [7, nan, -14, 4, 19, 22.7, 59.123] | |
529 | ||
530 | Clearly that isn't correct, and for once it isn't my fault! | |
531 | `It's hard to compare floating point numbers`_. By definition, **NaN** is unorderable | |
532 | to any other number, and is never equal to any other number, including itself. | |
533 | ||
534 | .. code-block:: pycon | |
535 | ||
536 | >>> nan = float('nan') | |
537 | >>> 5 > nan | |
538 | False | |
539 | >>> 5 < nan | |
540 | False | |
541 | >>> 5 == nan | |
542 | False | |
543 | >>> 5 != nan | |
544 | True | |
545 | >>> nan == nan | |
546 | False | |
547 | >>> nan != nan | |
548 | True | |
549 | ||
550 | The implication of all this for us is that if there is an **NaN** in the | |
551 | data-set we are trying to sort, the data-set will end up being sorted in | |
552 | two separate yet individually sorted sequences - the one *before* the **NaN**, | |
553 | and the one *after*. This is because the ``<`` operation that is used | |
554 | to sort always returns :const:`False` with **NaN**. | |
555 | ||
556 | Because :mod:`natsort` aims to sort sequences in a way that does not surprise | |
557 | the user, keeping this behavior is not acceptable (I don't require my users | |
558 | to know how **NaN** will behave in a sorting algorithm). The simplest way to | |
559 | satisfy the "least astonishment" principle is to substitute **NaN** with | |
560 | some other value. But what value is *least* astonishing? I chose to replace | |
561 | **NaN** with :math:`-\infty` so that these poorly behaved elements always | |
562 | end up at the front where the users will most likely be alerted to their | |
563 | presence. | |
564 | ||
565 | .. code-block:: pycon | |
566 | ||
567 | >>> def fix_nan(x): | |
568 | ... if x != x: # only true for NaN | |
569 | ... return float('-inf') | |
570 | ... else: | |
571 | ... return x | |
572 | ... | |
573 | ||
574 | Let's check out :ref:`TL;DR 2 <tldr2>` to see how this can be | |
575 | incorporated into the simple key function from :ref:`TL;DR 1 <tldr1>`. | |
576 | ||
577 | .. _tldr2: | |
578 | ||
579 | TL;DR 2 - Handling Crappy, Real-World Input | |
580 | +++++++++++++++++++++++++++++++++++++++++++ | |
581 | ||
582 | Let's see how our elegant key function from :ref:`TL;DR 1 <tldr1>` has | |
583 | become bastardized in order to support handling mixed real-world data | |
584 | and user customizations. | |
585 | ||
586 | .. code-block:: pycon | |
587 | ||
588 | >>> def natsort_key(x, as_float=False, signed=False, as_path=False): | |
589 | ... if as_float: | |
590 | ... regex = signed_float if signed else unsigned_float | |
591 | ... else: | |
592 | ... regex = signed_int if signed else unsigned_int | |
593 | ... try: | |
594 | ... if as_path: | |
595 | ... x = decompose_path_into_components(x) # Decomposes into list of strings | |
596 | ... # If this raises a TypeError, input is not a string. | |
597 | ... split_input = re.split(regex, x) | |
598 | ... except TypeError: | |
599 | ... try: | |
600 | ... # Does this need to be applied recursively (list-of-list)? | |
601 | ... return tuple(map(natsort_key, x)) | |
602 | ... except TypeError: | |
603 | ... # Must be a number | |
604 | ... ret = ('', fix_nan(x)) # Maintain string-number-string pattern | |
605 | ... return (ret,) if as_path else ret # as_path returns tuple-of-tuples | |
606 | ... else: | |
607 | ... split_input = filter(None, split_input) # removes null strings | |
608 | ... # Note that the coerce_to_int/coerce_to_float functions | |
609 | ... # are also modified to use the fix_nan function. | |
610 | ... if as_float: | |
611 | ... coerced_input = (coerce_to_float(s) for s in split_input) | |
612 | ... else: | |
613 | ... coerced_input = (coerce_to_int(s) for s in split_input) | |
614 | ... return tuple(sep_inserter(coerced_input, '')) | |
615 | ... | |
616 | ||
617 | And this doesn't even show handling :class:`bytes` type! Notice that we have | |
618 | to do non-obvious things like modify the return form of numbers when ``as_path`` | |
619 | is given, just to avoid comparing strings and numbers for the case in which a | |
620 | user provides input like ``['/home/me', 42]``. | |
621 | ||
622 | Let's take it out for a spin! | |
623 | ||
624 | .. code-block:: pycon | |
625 | ||
626 | >>> danger = [7, float('nan'), 22.7, '19', '-14', '59.123', 4] | |
627 | >>> sorted(danger, key=lambda x: natsort_key(x, as_float=True, signed=True)) | |
628 | [nan, '-14', 4, 7, '19', 22.7, '59.123'] | |
629 | >>> | |
630 | >>> paths = ['Folder (1)/file.tar.gz', | |
631 | ... 'Folder/file.tar.gz', | |
632 | ... 123456] | |
633 | >>> sorted(paths, key=lambda x: natsort_key(x, as_path=True)) | |
634 | [123456, 'Folder/file.tar.gz', 'Folder (1)/file.tar.gz'] | |
635 | ||
636 | Here Be Dragons: Adding Locale Support | |
637 | -------------------------------------- | |
638 | ||
639 | .. contents:: | |
640 | :local: | |
641 | ||
642 | Probably the most challenging special case I had to handle was getting | |
643 | :mod:`natsort` to handle sorting the non-numerical parts of input | |
644 | correctly, and also allowing it to sort the numerical bits in different | |
645 | locales. This was in no way what I originally set out to do with this | |
646 | library, so I was | |
647 | `caught a bit off guard when the request was initially made`_. | |
648 | I discovered the :mod:`locale` library, and assumed that if it's part of | |
649 | Python's StdLib there can't be too many dragons, right? | |
650 | ||
651 | .. admonition:: INCOMPLETE LIST OF DRAGONS | |
652 | ||
653 | - https://github.com/SethMMorton/natsort/issues/21 | |
654 | - https://github.com/SethMMorton/natsort/issues/22 | |
655 | - https://github.com/SethMMorton/natsort/issues/23 | |
656 | - https://github.com/SethMMorton/natsort/issues/36 | |
657 | - https://github.com/SethMMorton/natsort/issues/44 | |
658 | - https://bugs.python.org/issue2481 | |
659 | - https://bugs.python.org/issue23195 | |
660 | - https://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help | |
661 | - https://stackoverflow.com/questions/22203550/sort-dictionary-by-key-using-locale-collation | |
662 | - https://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm | |
663 | - https://stackoverflow.com/questions/36431810/sort-numeric-lines-with-thousand-separators | |
664 | - https://stackoverflow.com/questions/45734562/how-can-i-get-a-reasonable-string-sorting-with-python | |
665 | ||
666 | These can be summed up as follows: | |
667 | ||
668 | #. :mod:`locale` is a thin wrapper over your operating system's *locale* | |
669 | library, so if *that* is broken (like it is on BSD and OSX) then | |
670 | :mod:`locale` is broken in Python. | |
671 | #. Because of a bug in legacy Python (i.e. Python 2), there was no uniform | |
672 | way to use the :mod:`locale` sorting functionality between legacy Python | |
673 | and Python (luckily this is no longer an issue now that Python 2 is EOL). | |
674 | #. People have differing opinions of how capitalization should affect word | |
675 | order. | |
676 | #. There is no built-in way to handle locale-dependent thousands separators | |
677 | and decimal points *robustly*. | |
678 | #. Proper handling of Unicode is complicated. | |
679 | #. Proper handling of :mod:`locale` is complicated. | |
680 | ||
681 | Easily over half of the code in :mod:`natsort` is in some way dealing with some | |
682 | aspect of :mod:`locale` or basic case handling. It would have been impossible | |
683 | to get right without a `really good`_ `testing strategy`_. | |
684 | ||
685 | Don't expect any more TL;DR's... if you want to see how all this is fully | |
686 | incorporated into the :mod:`natsort` algorithm then please take a look | |
687 | `at the code`_. However, I will hint at how specific steps are taken in | |
688 | each section. | |
689 | ||
690 | Let's see how we can handle some of the dragons, one-by-one. | |
691 | ||
692 | Basic Case Control Support | |
693 | ++++++++++++++++++++++++++ | |
694 | ||
695 | Without even thinking about the mess that is adding :mod:`locale` support, | |
696 | :mod:`natsort` can introduce support for controlling how case is interpreted. | |
697 | ||
698 | First, let's take a look at how it is sorted by default (due to | |
699 | where characters lie on the `ASCII table`_). | |
700 | ||
701 | .. code-block:: pycon | |
702 | ||
703 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
704 | >>> sorted(a) | |
705 | ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
706 | ||
707 | All uppercase letters come before lowercase letters in the `ASCII table`_, | |
708 | so all capitalized words appear first. Not everyone agrees that this | |
709 | is the correct order. Some believe that the capitalized words should | |
710 | be last (``['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']``). | |
711 | Some believe that both the lowercase and uppercase versions | |
712 | should appear together | |
713 | (``['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']``). | |
714 | Some believe that both should be true ☹. Some people don't care at all [#f3]_. | |
715 | ||
716 | Solving the first case (I call it *LOWERCASEFIRST*) is actually pretty | |
717 | easy... just call the :meth:`str.swapcase` method on the input. | |
718 | ||
719 | .. code-block:: pycon | |
720 | ||
721 | >>> sorted(a, key=lambda x: x.swapcase()) | |
722 | ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
723 | ||
724 | The last (i call it *IGNORECASE*) is pretty easy. | |
725 | Simply call :meth:`str.casefold` on the input (it's like :meth:`std.lowercase` | |
726 | but does a better job on non-latin character sets). | |
727 | ||
728 | .. code-block:: pycon | |
729 | ||
730 | >>> sorted(a, key=lambda x: x.casefold()) | |
731 | ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
732 | ||
733 | The middle case (I call it *GROUPLETTERS*) is less straightforward. | |
734 | The most efficient way to handle this is to duplicate each character | |
735 | with its lowercase version and then the original character. | |
736 | ||
737 | .. code-block:: pycon | |
738 | ||
739 | >>> import itertools | |
740 | >>> def groupletters(x): | |
741 | ... return ''.join(itertools.chain.from_iterable((y.casefold(), y) for y in x)) | |
742 | ... | |
743 | >>> groupletters('Apple') | |
744 | 'aAppppllee' | |
745 | >>> groupletters('apple') | |
746 | 'aappppllee' | |
747 | >>> sorted(a, key=groupletters) | |
748 | ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
749 | ||
750 | The effect of this is that both ``'Apple'`` and ``'apple'`` are | |
751 | placed adjacent to each other because their transformations both begin | |
752 | with ``'a'``, and then the second character can be used to order them | |
753 | appropriately with respect to each other. | |
754 | ||
755 | There's a problem with this, though. Within the context of :mod:`natsort` | |
756 | we are trying to correctly sort numbers and those should be left alone. | |
757 | ||
758 | .. code-block:: pycon | |
759 | ||
760 | >>> a = ['Apple5', 'apple', 'Apple4E10', 'Banana'] | |
761 | >>> sorted(a, key=lambda x: natsort_key(x, as_float=True)) | |
762 | ['Apple5', 'Apple4E10', 'Banana', 'apple'] | |
763 | >>> sorted(a, key=lambda x: natsort_key(groupletters(x), as_float=True)) | |
764 | ['Apple4E10', 'Apple5', 'apple', 'Banana'] | |
765 | >>> groupletters('Apple4E10') | |
766 | 'aAppppllee44eE1100' | |
767 | ||
768 | We messed up the numbers! Looks like :func:`groupletters` needs to be applied | |
769 | *after* the strings are broken into their components. I'm not going to show | |
770 | how this is done here, but basically it requires applying the function in | |
771 | the ``else:`` block of :func:`coerce_to_int`/:func:`coerce_to_float`. | |
772 | ||
773 | .. code-block:: pycon | |
774 | ||
775 | >>> better_groupletters = natsort_keygen(alg=ns.GROUPLETTERS | ns.REAL) | |
776 | >>> better_groupletters('Apple4E10') | |
777 | ('aAppppllee', 40000000000.0) | |
778 | >>> sorted(a, key=better_groupletters) | |
779 | ['Apple5', 'Apple4E10', 'apple', 'Banana'] | |
780 | ||
781 | Of course, applying both *LOWERCASEFIRST* and *GROUPLETTERS* is just | |
782 | a matter of turning on both functions. | |
783 | ||
784 | Basic Unicode Support | |
785 | +++++++++++++++++++++ | |
786 | ||
787 | Unicode is hard and complicated. Here's an example. | |
788 | ||
789 | .. code-block:: pycon | |
790 | ||
791 | >>> b = [b'\x66', b'\x65', b'\xc3\xa9', b'\x65\xcc\x81', b'\x61', b'\x7a'] | |
792 | >>> a = [x.decode('utf8') for x in b] | |
793 | >>> a # doctest: +SKIP | |
794 | ['f', 'e', 'é', 'é', 'a', 'z'] | |
795 | >>> sorted(a) # doctest: +SKIP | |
796 | ['a', 'e', 'é', 'f', 'z', 'é'] | |
797 | ||
798 | There are more than one way to represent the character 'é' in Unicode. | |
799 | In fact, many characters have multiple representations. This is a challenge | |
800 | because comparing the two representations would return ``False`` even though | |
801 | they *look* the same. | |
802 | ||
803 | .. code-block:: pycon | |
804 | ||
805 | >>> a[2] == a[3] | |
806 | False | |
807 | ||
808 | Alas, since characters are compared based on the numerical value of their | |
809 | representation, sorting Unicode often gives unexpected results (like seeing | |
810 | 'é' come both *before* and *after* 'z'). | |
811 | ||
812 | The original approach that :mod:`natsort` took with respect to non-ASCII | |
813 | Unicode characters was to say "just use | |
814 | the :mod:`locale` or :mod:`PyICU` library" and then cross it's fingers | |
815 | and hope those libraries take care of it. As you will find in the following | |
816 | sections, that comes with its own baggage, and turned out to not always work | |
817 | anyway (see https://stackoverflow.com/q/45734562/1399279). A more robust | |
818 | approach is to handle the Unicode out-of-the-box without invoking a | |
819 | heavy-handed library like :mod:`locale` or :mod:`PyICU`. | |
820 | To do this, we must use *normalization*. | |
821 | ||
822 | To fully understand Unicode normalization, | |
823 | `check out some official Unicode documentation`_. | |
824 | Just kidding... that's too much text. The following StackOverflow answers do | |
825 | a good job at explaining Unicode normalization in simple terms: | |
826 | https://stackoverflow.com/a/7934397/1399279 and | |
827 | https://stackoverflow.com/a/7931547/1399279. Put simply, normalization | |
828 | ensures that Unicode characters with multiple representations are in | |
829 | some canonical and consistent representation so that (for example) comparisons | |
830 | of the characters can be performed in a sane way. The following discussion | |
831 | assumes you at least read the StackOverflow answers. | |
832 | ||
833 | Looking back at our 'é' example, we can see that the two versions were | |
834 | constructed with the byte strings ``b'\xc3\xa9'`` and ``b'\x65\xcc\x81'``. | |
835 | The former representation is actually | |
836 | `LATIN SMALL LETTER E WITH ACUTE <https://www.fileformat.info/info/unicode/char/e9/index.htm>`_ | |
837 | and is a single character in the Unicode standard. This is known as the | |
838 | *compressed form* and corresponds to the 'NFC' normalization scheme. | |
839 | The latter representation is actually the letter 'e' followed by | |
840 | `COMBINING ACUTE ACCENT <https://www.fileformat.info/info/unicode/char/0301/index.htm>`_ | |
841 | and so is two characters in the Unicode standard. This is known as the | |
842 | *decompressed form* and corresponds to the 'NFD' normalization scheme. | |
843 | Since the first character in the decompressed form is actually the letter 'e', | |
844 | when compared to other ASCII characters it fits where you might expect. | |
845 | Unfortunately, all Unicode compressed form characters come after the | |
846 | ASCII characters and so they always will be placed after 'z' when sorting. | |
847 | ||
848 | It seems that most Unicode data is stored and shared in the compressed form | |
849 | which makes it challenging to sort. This can be solved by normalizing all | |
850 | incoming Unicode data to the decompressed form ('NFD') and *then* sorting. | |
851 | ||
852 | .. code-block:: pycon | |
853 | ||
854 | >>> import unicodedata | |
855 | >>> c = [unicodedata.normalize('NFD', x) for x in a] | |
856 | >>> c # doctest: +SKIP | |
857 | ['f', 'e', 'é', 'é', 'a', 'z'] | |
858 | >>> sorted(c) # doctest: +SKIP | |
859 | ['a', 'e', 'é', 'é', 'f', 'z'] | |
860 | ||
861 | Huzzah! Sane sorting without having to resort to :mod:`locale`! | |
862 | ||
863 | Using Locale to Compare Strings | |
864 | +++++++++++++++++++++++++++++++ | |
865 | ||
866 | The :mod:`locale` module is actually pretty cool, and provides lowly | |
867 | spare-time programmers like myself a way to handle the daunting task | |
868 | of proper locale-dependent support of their libraries and utilities. | |
869 | Having said that, it can be a bit of a bear to get right, | |
870 | `although they do point out in the documentation that it will be painful to use`_. | |
871 | Aside from the caveats spelled out in that link, it turns out that just | |
872 | comparing strings with :mod:`locale` in a cross-platform and | |
873 | cross-python-version manner is not as straightforward as one might hope. | |
874 | ||
875 | First, how to use :mod:`locale` to compare strings? It's actually | |
876 | pretty straightforward. Simply run the input through the :mod:`locale` | |
877 | transformation function :func:`locale.strxfrm`. | |
878 | ||
879 | .. code-block:: pycon | |
880 | ||
881 | >>> import locale, sys | |
882 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
883 | 'en_US.UTF-8' | |
884 | >>> a = ['a', 'b', 'ä'] | |
885 | >>> sorted(a) | |
886 | ['a', 'b', 'ä'] | |
887 | >>> # The below fails on OSX, so don't run doctest on darwin. | |
888 | >>> is_osx = sys.platform == 'darwin' | |
889 | >>> sorted(a, key=locale.strxfrm) if not is_osx else ['a', 'ä', 'b'] | |
890 | ['a', 'ä', 'b'] | |
891 | >>> | |
892 | >>> a = ['apple', 'Banana', 'banana', 'Apple'] | |
893 | >>> sorted(a, key=locale.strxfrm) if not is_osx else ['apple', 'Apple', 'banana', 'Banana'] | |
894 | ['apple', 'Apple', 'banana', 'Banana'] | |
895 | ||
896 | It turns out that locale-aware sorting groups numbers in the same | |
897 | way as turning on *GROUPLETTERS* and *LOWERCASEFIRST*. | |
898 | The trick is that you have to apply :func:`locale.strxfrm` only to non-numeric | |
899 | characters; otherwise, numbers won't be parsed properly. Therefore, it must | |
900 | be applied as part of the :func:`coerce_to_int`/:func:`coerce_to_float` | |
901 | functions in a manner similar to :func:`groupletters`. | |
902 | ||
903 | Unicode Support With Local | |
904 | ++++++++++++++++++++++++++ | |
905 | ||
906 | Remember how in the `Basic Unicode Support`_ section I mentioned that we | |
907 | use the "decompressed" Unicode normalization form (e.g. NFD) on all inputs | |
908 | to ensure the order is as expected? | |
909 | ||
910 | If you have been following along so far, you probably expect that it is not | |
911 | that easy. You would be correct. | |
912 | ||
913 | It turns out that some locales (but not all) expect the input to be in | |
914 | "compressed form" (e.g. NFC) or the ordering is not as you might expect. | |
915 | `Check out this issue for a real-world example`_. Here's a relevant | |
916 | snippet of code | |
917 | ||
918 | .. code-block:: pycon | |
919 | ||
920 | In [1]: import locale, unicodedata | |
921 | ||
922 | In [2]: a = ['Aš', 'Cheb', 'Česko', 'Cibulov', 'Znojmo', 'Žilina'] | |
923 | ||
924 | In [3]: locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
925 | Out[3]: 'en_US.UTF-8' | |
926 | ||
927 | In [4]: sorted(a, key=locale.strxfrm) | |
928 | Out[4]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
929 | ||
930 | In [5]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFD", x))) | |
931 | Out[5]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
932 | ||
933 | In [6]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFC", x))) | |
934 | Out[6]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
935 | ||
936 | In [7]: locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8') | |
937 | Out[7]: 'de_DE.UTF-8' | |
938 | ||
939 | In [8]: sorted(a, key=locale.strxfrm) | |
940 | Out[8]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
941 | ||
942 | In [9]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFD", x))) | |
943 | Out[9]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
944 | ||
945 | In [10]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFC", x))) | |
946 | Out[10]: ['Aš', 'Česko', 'Cheb', 'Cibulov', 'Žilina', 'Znojmo'] | |
947 | ||
948 | In [11]: locale.setlocale(locale.LC_ALL, 'cs_CZ.UTF-8') | |
949 | Out[11]: 'cs_CZ.UTF-8' | |
950 | ||
951 | In [12]: sorted(a, key=locale.strxfrm) | |
952 | Out[12]: ['Aš', 'Cibulov', 'Česko', 'Cheb', 'Znojmo', 'Žilina'] | |
953 | ||
954 | In [13]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFD", x))) | |
955 | Out[13]: ['Aš', 'Česko', 'Cibulov', 'Cheb', 'Žilina', 'Znojmo'] | |
956 | ||
957 | In [14]: sorted(a, key=lambda x: locale.strxfrm(unicodedata.normalize("NFC", x))) | |
958 | Out[14]: ['Aš', 'Cibulov', 'Česko', 'Cheb', 'Znojmo', 'Žilina'] | |
959 | ||
960 | Two out of three locales sort the same data in the same order no matter how the unicode | |
961 | input was normalized, but Czech seems to care how the input is formatted! | |
962 | ||
963 | So, everthing mentioned in `Basic Unicode Support`_ is conditional on whether | |
964 | or not the user wants to use the :mod:`locale` library or not. If not, then | |
965 | "NFD" normalization is used. If they do, "NFC" normalization is used. | |
966 | ||
967 | Handling Broken Locale On OSX | |
968 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
969 | ||
970 | But what if the underlying *locale* implementation that :mod:`locale` | |
971 | relies upon is simply broken? It turns out that the *locale* library on | |
972 | OSX (and other BSD systems) is broken (and for some reason has never been | |
973 | fixed?), and so :mod:`locale` does not work as expected. | |
974 | ||
975 | How do I define doesn't work as expected? | |
976 | ||
977 | .. code-block:: pycon | |
978 | ||
979 | >>> a = ['apple', 'Banana', 'banana', 'Apple'] | |
980 | >>> sorted(a) | |
981 | ['Apple', 'Banana', 'apple', 'banana'] | |
982 | >>> | |
983 | >>> sorted(a, key=locale.strxfrm) if is_osx else sorted(a) | |
984 | ['Apple', 'Banana', 'apple', 'banana'] | |
985 | ||
986 | IT'S SORTING AS IF :func:`locale.stfxfrm` WAS NEVER USED!! (and it's worse | |
987 | once non-ASCII characters get thrown into the mix.) I'm really not | |
988 | sure why this is considered OK for the OSX/BSD maintainers to not fix, | |
989 | but it's more than frustrating for poor developers who have been dragged | |
990 | into the *locale* game kicking and screaming. *<deep breath>*. | |
991 | ||
992 | So, how to deal with this situation? There are two ways to do so. | |
993 | ||
994 | #. Detect if :mod:`locale` is sorting incorrectly (i.e. ``dumb``) by seeing | |
995 | if ``'A'`` is sorted before ``'a'`` (incorrect) or not. | |
996 | ||
997 | .. code-block:: pycon | |
998 | ||
999 | >>> # This is genuinely the name of this function. | |
1000 | >>> # See natsort.compat.locale.py | |
1001 | >>> def dumb_sort(): | |
1002 | ... return locale.strxfrm('A') < locale.strxfrm('a') | |
1003 | ... | |
1004 | ||
1005 | If a ``dumb`` *locale* implementation is found, then automatically | |
1006 | turn on *LOWERCASEFIRST* and *GROUPLETTERS*. | |
1007 | #. Use an alternate library if installed. `ICU <http://site.icu-project.org/>`_ | |
1008 | is a great and powerful library that has a pretty decent Python port | |
1009 | called (you guessed it) `PyICU <https://pypi.org/project/PyICU/>`_. | |
1010 | If a user has this library installed on their computer, :mod:`natsort` | |
1011 | chooses to use that instead of :mod:`locale`. With a little bit of | |
1012 | planning, one can write a set of wrapper functions that call | |
1013 | the correct library under the hood such that the business logic never | |
1014 | has to know what library is being used (see `natsort.compat.locale.py`_). | |
1015 | ||
1016 | Let me tell you, this little complication really makes a challenge of testing | |
1017 | the code, since one must set up different environments on different operating | |
1018 | systems in order to test all possible code paths. Not to mention that | |
1019 | certain checks *will* fail for certain operating systems and environments | |
1020 | so one must be diligent in either writing the tests not to fail, or ignoring | |
1021 | those tests when on offending environments. | |
1022 | ||
1023 | Handling Locale-Aware Numbers | |
1024 | +++++++++++++++++++++++++++++ | |
1025 | ||
1026 | `Thousands separator support`_ is a problem that I knew would someday be | |
1027 | requested but had decided to push off until a rainy day. One day it finally | |
1028 | rained, and I decided to tackle the problem. | |
1029 | ||
1030 | So what is the problem? Consider the number ``1,234,567`` (assuming the | |
1031 | ``','`` is the thousands separator). Try to run that through :func:`int` | |
1032 | and you will get a :exc:`ValueError`. To handle this properly the thousands | |
1033 | separators must be removed. | |
1034 | ||
1035 | .. code-block:: pycon | |
1036 | ||
1037 | >>> float('1,234,567'.replace(',', '')) | |
1038 | 1234567.0 | |
1039 | ||
1040 | What if, in our current locale, the thousands separator is ``'.'`` and | |
1041 | the ``','`` is the decimal separator (like for the German locale *de_DE*)? | |
1042 | ||
1043 | .. code-block:: pycon | |
1044 | ||
1045 | >>> float('1.234.567'.replace('.', '').replace(',', '.')) | |
1046 | 1234567.0 | |
1047 | >>> float('1.234.567,89'.replace('.', '').replace(',', '.')) | |
1048 | 1234567.89 | |
1049 | ||
1050 | This is pretty much what :func:`locale.atoi` and :func:`locale.atof` do | |
1051 | under the hood. So what's the problem? Why doesn't :mod:`natsort` just | |
1052 | use this method under its hood? | |
1053 | Well, let's take a look at what would happen if we send some possible | |
1054 | :mod:`natsort` input through our the above function: | |
1055 | ||
1056 | .. code-block:: pycon | |
1057 | ||
1058 | >>> natsort_key('1,234 apples, please.'.replace(',', '')) | |
1059 | ('', 1234, ' apples please.') | |
1060 | >>> natsort_key('Sir, €1.234,50 please.'.replace('.', '').replace(',', '.'), as_float=True) | |
1061 | ('Sir. €', 1234.5, ' please') | |
1062 | ||
1063 | Any character matching the thousands separator was dropped, and anything | |
1064 | matching the decimal separator was changed to ``'.'``! If these characters | |
1065 | were critical to how your data was ordered, this would break :mod:`natsort`. | |
1066 | ||
1067 | The first solution one might consider would be to first decompose the | |
1068 | input into sub-components (like we did for the *GROUPLETTERS* method | |
1069 | above) and then only apply these transformations on the number components. | |
1070 | This is a chicken-and-egg problem, though, because *we cannot appropriately | |
1071 | separate out the numbers because of the thousands separators and | |
1072 | non-'.' decimal separators* (well, at least not without making multiple | |
1073 | passes over the data which I do not consider to be a valid option). | |
1074 | ||
1075 | Regular expressions to the rescue! With regular expressions, we can | |
1076 | remove the thousands separators and change the decimal separator only | |
1077 | when they are actually within a number. Once the input has been | |
1078 | pre-processed with this regular expression, all the infrastructure | |
1079 | shown previously will work. | |
1080 | ||
1081 | Beware, these regular expressions will make your eyes bleed. | |
1082 | ||
1083 | .. code-block:: pycon | |
1084 | ||
1085 | >>> decimal = ',' # Assume German locale, so decimal separator is ',' | |
1086 | >>> # Look-behind assertions cannot accept range modifiers, so instead of i.e. | |
1087 | >>> # (?<!\.[0-9]{1,3}) I have to repeat the look-behind for 1, 2, and 3. | |
1088 | >>> nodecimal = r'(?<!{dec}[0-9])(?<!{dec}[0-9]{{2}})(?<!{dec}[0-9]{{3}})'.format(dec=decimal) | |
1089 | >>> strip_thousands = r''' | |
1090 | ... (?<=[0-9]{{1}}) # At least 1 number | |
1091 | ... (?<![0-9]{{4}}) # No more than 3 numbers | |
1092 | ... {nodecimal} # Cannot follow decimal | |
1093 | ... {thou} # The thousands separator | |
1094 | ... (?=[0-9]{{3}} # Three numbers must follow | |
1095 | ... ([^0-9]|$) # But a non-number after that | |
1096 | ... ) | |
1097 | ... '''.format(nodecimal=nodecimal, thou=re.escape('.')) # Thousands separator is '.' in German locale. | |
1098 | ... | |
1099 | >>> re.sub(strip_thousands, '', 'Sir, €1.234,50 please.', flags=re.X) | |
1100 | 'Sir, €1234,50 please.' | |
1101 | >>> | |
1102 | >>> # The decimal point must be preceded by a number or after | |
1103 | >>> # a number. This option only needs to be performed in the | |
1104 | >>> # case when the decimal separator for the locale is not '.'. | |
1105 | >>> switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' | |
1106 | >>> switch_decimal = switch_decimal.format(decimal=decimal) | |
1107 | >>> re.sub(switch_decimal, '.', 'Sir, €1234,50 please.', flags=re.X) | |
1108 | 'Sir, €1234.50 please.' | |
1109 | >>> | |
1110 | >>> natsort_key('Sir, €1234.50 please.', as_float=True) | |
1111 | ('Sir, €', 1234.5, ' please.') | |
1112 | ||
1113 | Final Thoughts | |
1114 | -------------- | |
1115 | ||
1116 | My hope is that users of :mod:`natsort` never have to think about or worry | |
1117 | about all the bookkeeping or any of the details described above, and that using | |
1118 | :mod:`natsort` seems to magically "just work". For those of you who | |
1119 | took the time to read this engineering description, I hope it has enlightened | |
1120 | you to some of the issues that can be encountered when code is released | |
1121 | into the wild and has to accept "real-world data", or to what happens | |
1122 | to developers who naïvely make bold assumptions that are counter to | |
1123 | what the rest of the world assumes. | |
1124 | ||
1125 | .. rubric:: Footnotes | |
1126 | ||
1127 | .. [#f1] | |
1128 | *"But if you hadn't removed the leading empty string from re.split this | |
1129 | wouldn't have happened!!"* I can hear you saying. Well, that's true. I don't | |
1130 | have a *great* reason for having done that except that in an earlier | |
1131 | non-optimal incarnation of the algorithm I needed to it, and it kind of | |
1132 | stuck, and it made other parts of the code easier if the assumption that | |
1133 | there were no empty strings was valid. | |
1134 | .. [#f2] | |
1135 | I'm not going to show how this is implemented in this document, | |
1136 | but if you are interested you can look at the code to | |
1137 | :func:`sep_inserter` in `util.py`_. | |
1138 | .. [#f3] | |
1139 | Handling each of these is straightforward, but coupled with the rapidly | |
1140 | fracturing execution paths presented in :ref:`TL;DR 2 <tldr2>` one can | |
1141 | imagine this will get out of hand quickly. If you take a look at | |
1142 | `natsort.py`_ and `util.py`_ you can observe that to avoid this I take | |
1143 | a more functional approach to construting the :mod:`natsort` algorithm | |
1144 | as opposed to the procedural approach illustrated in | |
1145 | :ref:`TL;DR 1 <tldr1>` and :ref:`TL;DR 2 <tldr2>`. | |
1146 | ||
1147 | .. _ASCII table: https://www.asciitable.com/ | |
1148 | .. _getting sorting right is surprisingly hard: http://www.compciv.org/guides/python/fundamentals/sorting-collections-with-sorted/ | |
1149 | .. _This astonished: https://github.com/SethMMorton/natsort/issues/19 | |
1150 | .. _a lot: https://stackoverflow.com/questions/29548742/python-natsort-sort-strings-recursively | |
1151 | .. _of people: https://stackoverflow.com/questions/24045348/sort-set-of-numbers-in-the-form-xx-yy-in-python | |
1152 | .. _and some people aren't very nice when they are astonished: | |
1153 | https://github.com/xolox/python-naturalsort/blob/ed3e6b6ffaca3bdea3b76e08acbb8bd2a5fee463/README.rst#why-another-natsort-module | |
1154 | .. _fastnumbers: https://github.com/SethMMorton/fastnumbers | |
1155 | .. _as part of my testing: https://github.com/SethMMorton/natsort/blob/master/test_natsort/slow_splitters.py | |
1156 | .. _this one for coercion: https://stackoverflow.com/questions/736043/checking-if-a-string-can-be-converted-to-float-in-python | |
1157 | .. _this one for checking: https://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float | |
1158 | .. _most natural sort solutions for python on Stack Overflow: https://stackoverflow.com/q/4836710/1399279 | |
1159 | .. _80%/20%: https://en.wikipedia.org/wiki/Pareto_principle | |
1160 | .. _The first major special case I encountered was sorting filesystem paths: https://github.com/SethMMorton/natsort/issues/3 | |
1161 | .. _The second major special case I encountered was sorting of different types: https://github.com/SethMMorton/natsort/issues/7 | |
1162 | .. _A rather unexpected special case I encountered was sorting collections containing NaN: | |
1163 | https://github.com/SethMMorton/natsort/issues/27 | |
1164 | .. _It's hard to compare floating point numbers: http://www.drdobbs.com/cpp/its-hard-to-compare-floating-point-numbe/240149806 | |
1165 | .. _caught a bit off guard when the request was initially made: https://github.com/SethMMorton/natsort/issues/14 | |
1166 | .. _at the code: https://github.com/SethMMorton/natsort/tree/master/natsort | |
1167 | .. _natsort.py: https://github.com/SethMMorton/natsort/blob/master/natsort/natsort.py | |
1168 | .. _util.py: https://github.com/SethMMorton/natsort/blob/master/natsort/util.py | |
1169 | .. _although they do point out in the documentation that it will be painful to use: | |
1170 | https://docs.python.org/3/library/locale.html#background-details-hints-tips-and-caveats | |
1171 | .. _natsort.compat.locale.py: https://github.com/SethMMorton/natsort/blob/master/natsort/compat/locale.py | |
1172 | .. _Thousands separator support: https://github.com/SethMMorton/natsort/issues/36 | |
1173 | .. _really good: https://hypothesis.readthedocs.io/en/latest/ | |
1174 | .. _testing strategy: https://docs.pytest.org/en/latest/ | |
1175 | .. _check out some official Unicode documentation: https://unicode.org/reports/tr15/ | |
1176 | .. _Check out this issue for a real-world example: https://github.com/SethMMorton/natsort/issues/140⏎ | |
14 | This page has been moved to the | |
15 | `natsort wiki <https://github.com/SethMMorton/natsort/wiki/How-Does-Natsort-Work%3F#special-cases-everywhere>`_. |
5 | 5 | Possible Issues with :func:`~natsort.humansorted` or ``ns.LOCALE`` |
6 | 6 | ================================================================== |
7 | 7 | |
8 | Being Locale-Aware Means Both Numbers and Non-Numbers | |
9 | ----------------------------------------------------- | |
10 | ||
11 | In addition to modifying how characters are sorted, ``ns.LOCALE`` will take | |
12 | into account locale-dependent thousands separators (and locale-dependent | |
13 | decimal separators if ``ns.FLOAT`` is enabled). This means that if you are in a | |
14 | locale that uses commas as the thousands separator, a number like | |
15 | ``123,456`` will be interpreted as ``123456``. If this is not what you want, | |
16 | you may consider using ``ns.LOCALEALPHA`` which will only enable locale-aware | |
17 | sorting for non-numbers (similarly, ``ns.LOCALENUM`` enables locale-aware | |
18 | sorting only for numbers). | |
19 | ||
20 | Regenerate Key With :func:`~natsort.natsort_keygen` After Changing Locale | |
21 | ------------------------------------------------------------------------- | |
22 | ||
23 | When :func:`~natsort.natsort_keygen` is called it returns a key function that | |
24 | hard-codes the provided settings. This means that the key returned when | |
25 | ``ns.LOCALE`` is used contains the settings specified by the locale | |
26 | *loaded at the time the key is generated*. If you change the locale, | |
27 | you should regenerate the key to account for the new locale. | |
28 | ||
29 | Corollary: Do Not Reuse :func:`~natsort.natsort_keygen` After Changing Locale | |
30 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
31 | ||
32 | If you change locale, the old function will not work as expected. | |
33 | The :mod:`locale` library works with a global state. When | |
34 | :func:`~natsort.natsort_keygen` is called it does the best job that it can to | |
35 | make the returned function as static as possible and independent of the global | |
36 | state, but the :func:`locale.strxfrm` function must access this global state to | |
37 | work; therefore, if you change locale and use ``ns.LOCALE`` then you should | |
38 | discard the old key. | |
39 | ||
40 | .. note:: If you use `PyICU`_ then you may be able to reuse keys after changing | |
41 | locale. | |
42 | ||
43 | The :mod:`locale` Module From the StdLib Has Issues | |
44 | --------------------------------------------------- | |
45 | ||
46 | :mod:`natsort` will use `PyICU`_ for :func:`~natsort.humansorted` or | |
47 | ``ns.LOCALE`` if it is installed. If not, it will fall back on the | |
48 | :mod:`locale` library from the Python stdlib. If you do not have `PyICU`_ | |
49 | installed, please keep the following known problems and issues in mind. | |
50 | ||
51 | .. note:: Remember, if you have `PyICU`_ installed you shouldn't need to worry | |
52 | about any of these. | |
53 | ||
54 | Explicitly Set the Locale Before Using ``ns.LOCALE`` | |
55 | ++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
56 | ||
57 | I have found that unless you explicitly set a locale, the sorted order may not | |
58 | be what you expect. Setting this is straightforward | |
59 | (in the below example I use 'en_US.UTF-8', but you should use your | |
60 | locale): | |
61 | ||
62 | .. code-block:: pycon | |
63 | ||
64 | >>> import locale | |
65 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
66 | 'en_US.UTF-8' | |
67 | ||
68 | .. _bug_note: | |
69 | ||
70 | The :mod:`locale` Module Is Broken on Mac OS X | |
71 | ++++++++++++++++++++++++++++++++++++++++++++++ | |
72 | ||
73 | It's not Python's fault, but the OS... the locale library for BSD-based systems | |
74 | (of which Mac OS X is one) is broken. See the following links: | |
75 | ||
76 | - https://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help | |
77 | - https://bugs.python.org/issue23195 | |
78 | - https://github.com/SethMMorton/natsort/issues/21 (contains instructons on installing) | |
79 | - https://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm | |
80 | - https://github.com/SethMMorton/natsort/issues/34 | |
81 | ||
82 | Of course, installing `PyICU`_ fixes this, but if you don't want to or cannot | |
83 | install this there is some hope. | |
84 | ||
85 | 1. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
86 | to compensate for a broken ``locale`` library. When sorting non-numbers | |
87 | it will handle case as you expect, but it will still not be able to | |
88 | comprehend non-ASCII characters properly. Additionally, it has | |
89 | a built-in lookup table of thousands separators that are incorrect | |
90 | on OS X/BSD (but is possible it is not complete... please file an | |
91 | issue if you see it is not complete) | |
92 | 2. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than | |
93 | "\*.UTF-8" locale. I have found that these have fewer issues than | |
94 | "UTF-8", but your mileage may vary. | |
95 | ||
96 | .. _PyICU: https://pypi.org/project/PyICU | |
8 | This page has been moved to the | |
9 | `natsort wiki <https://github.com/SethMMorton/natsort/wiki/Possible-Issues-with-natsort.humansorted-or-ns.LOCALE>`_. |
5 | 5 | Shell Script |
6 | 6 | ============ |
7 | 7 | |
8 | The ``natsort`` shell script is automatically installed when you install | |
9 | :mod:`natsort` with pip. | |
10 | ||
11 | Below is the usage and some usage examples for the ``natsort`` shell script. | |
12 | ||
13 | Usage | |
14 | ----- | |
15 | ||
16 | .. code-block:: | |
17 | ||
18 | usage: natsort [-h] [--version] [-p] [-f LOW HIGH] [-F LOW HIGH] [-e EXCLUDE] | |
19 | [-r] [-t {digit,int,float,version,ver}] [--nosign] [--noexp] | |
20 | [--locale] | |
21 | [entries [entries ...]] | |
22 | ||
23 | Performs a natural sort on entries given on the command-line. | |
24 | A natural sort sorts numerically then alphabetically, and will sort | |
25 | by numbers in the middle of an entry. | |
26 | ||
27 | positional arguments: | |
28 | entries The entries to sort. Taken from stdin if nothing is | |
29 | given on the command line. | |
30 | ||
31 | optional arguments: | |
32 | -h, --help show this help message and exit | |
33 | --version show program's version number and exit | |
34 | -p, --paths Interpret the input as file paths. This is not | |
35 | strictly necessary to sort all file paths, but in | |
36 | cases where there are OS-generated file paths like | |
37 | "Folder/" and "Folder (1)/", this option is needed to | |
38 | make the paths sorted in the order you expect | |
39 | ("Folder/" before "Folder (1)/"). | |
40 | -f LOW HIGH, --filter LOW HIGH | |
41 | Used for keeping only the entries that have a number | |
42 | falling in the given range. | |
43 | -F LOW HIGH, --reverse-filter LOW HIGH | |
44 | Used for excluding the entries that have a number | |
45 | falling in the given range. | |
46 | -e EXCLUDE, --exclude EXCLUDE | |
47 | Used to exclude an entry that contains a specific | |
48 | number. | |
49 | -r, --reverse Returns in reversed order. | |
50 | -t {digit,int,float,version,ver,real,f,i,r,d}, | |
51 | --number-type {digit,int,float,version,ver,real,f,i,r,d}, | |
52 | --number_type {digit,int,float,version,ver,real,f,i,r,d} | |
53 | Choose the type of number to search for. "float" will | |
54 | search for floating-point numbers. "int" will only | |
55 | search for integers. "digit", "version", and "ver" are | |
56 | synonyms for "int"."real" is a shortcut for "float" | |
57 | with --sign. "i" and "d" are synonyms for "int", "f" | |
58 | is a synonym for "float", and "r" is a synonym for | |
59 | "real".The default is int. | |
60 | --nosign Do not consider "+" or "-" as part of a number, i.e. | |
61 | do not take sign into consideration. This is the | |
62 | default. | |
63 | -s, --sign Consider "+" or "-" as part of a number, i.e. take | |
64 | sign into consideration. The default is unsigned. | |
65 | --noexp Do not consider an exponential as part of a number, | |
66 | i.e. 1e4, would be considered as 1, "e", and 4, not as | |
67 | 10000. This only effects the --number-type=float. | |
68 | -l, --locale Causes natsort to use locale-aware sorting. You will | |
69 | get the best results if you install PyICU. | |
70 | ||
71 | Description | |
72 | ----------- | |
73 | ||
74 | ``natsort`` was originally written to aid in computational chemistry | |
75 | research so that it would be easy to analyze large sets of output files | |
76 | named after the parameter used: | |
77 | ||
78 | .. code-block:: console | |
79 | ||
80 | $ ls *.out | |
81 | mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out | |
82 | ||
83 | (Obviously, in reality there would be more files, but you get the idea.) Notice | |
84 | that the shell sorts in lexicographical order. This is the behavior of programs like | |
85 | ``find`` as well as ``ls``. The problem is passing these files to an | |
86 | analysis program causes them not to appear in numerical order, which can lead | |
87 | to bad analysis. To remedy this, use ``natsort``: | |
88 | ||
89 | .. code-block:: console | |
90 | ||
91 | $ natsort *.out | |
92 | mode744.43.out | |
93 | mode943.54.out | |
94 | mode1000.35.out | |
95 | mode1243.34.out | |
96 | $ natsort -t r *.out | xargs your_program | |
97 | ||
98 | ``-t r`` is short for ``--number-type real``. You can also place natsort in | |
99 | the middle of a pipe: | |
100 | ||
101 | .. code-block:: console | |
102 | ||
103 | $ find . -name "*.out" | natsort -t r | xargs your_program | |
104 | ||
105 | To sort version numbers, use the default ``--number-type``: | |
106 | ||
107 | .. code-block:: console | |
108 | ||
109 | $ ls * | |
110 | prog-1.10.zip prog-1.9.zip prog-2.0.zip | |
111 | $ natsort * | |
112 | prog-1.9.zip | |
113 | prog-1.10.zip | |
114 | prog-2.0.zip | |
115 | ||
116 | In general, all ``natsort`` shell script options mirror the :func:`~natsorted` | |
117 | API, with notable exception of the ``--filter``, ``--reverse-filter``, and ``--exclude`` | |
118 | options. These three options are used as follows: | |
119 | ||
120 | .. code-block:: console | |
121 | ||
122 | $ ls *.out | |
123 | mode1000.35.out mode1243.34.out mode744.43.out mode943.54.out | |
124 | $ natsort -t r *.out -f 900 1100 # Select only numbers between 900-1100 | |
125 | mode943.54.out | |
126 | mode1000.35.out | |
127 | $ natsort -t r *.out -F 900 1100 # Select only numbers NOT between 900-1100 | |
128 | mode744.43.out | |
129 | mode1243.34.out | |
130 | $ natsort -t r *.out -e 1000.35 # Exclude 1000.35 from search | |
131 | mode744.43.out | |
132 | mode943.54.out | |
133 | mode1243.34.out | |
134 | ||
135 | If you are sorting paths with OS-generated filenames, you may require the | |
136 | ``--paths``/``-p`` option: | |
137 | ||
138 | .. code-block:: console | |
139 | ||
140 | $ find . ! -path . -type f | |
141 | ./folder/file (1).txt | |
142 | ./folder/file.txt | |
143 | ./folder (1)/file.txt | |
144 | ./folder (10)/file.txt | |
145 | ./folder (2)/file.txt | |
146 | $ find . ! -path . -type f | natsort | |
147 | ./folder (1)/file.txt | |
148 | ./folder (2)/file.txt | |
149 | ./folder (10)/file.txt | |
150 | ./folder/file (1).txt | |
151 | ./folder/file.txt | |
152 | $ find . ! -path . -type f | natsort -p | |
153 | ./folder/file.txt | |
154 | ./folder/file (1).txt | |
155 | ./folder (1)/file.txt | |
156 | ./folder (2)/file.txt | |
157 | ./folder (10)/file.txt | |
8 | This page has been moved to the | |
9 | `natsort wiki <https://github.com/SethMMorton/natsort/wiki/Shell-Script>`_. |
22 | 22 | from natsort.ns_enum import NSType, ns |
23 | 23 | from natsort.utils import KeyType, NatsortInType, NatsortOutType, chain_functions |
24 | 24 | |
25 | __version__ = "8.2.0" | |
25 | __version__ = "8.3.1" | |
26 | 26 | |
27 | 27 | __all__ = [ |
28 | 28 | "natsort_key", |
3 | 3 | from the fastnumbers module in the event that module is not installed. |
4 | 4 | """ |
5 | 5 | import unicodedata |
6 | from typing import Callable, FrozenSet, Optional, Union | |
6 | from typing import Callable, FrozenSet, Union | |
7 | 7 | |
8 | 8 | from natsort.unicode_numbers import decimal_chars |
9 | 9 | |
34 | 34 | StrOrInt = Union[str, int] |
35 | 35 | |
36 | 36 | |
37 | # noinspection PyIncorrectDocstring | |
38 | 37 | def fast_float( |
39 | 38 | x: str, |
40 | key: Callable[[str], StrOrFloat] = lambda x: x, | |
41 | nan: Optional[StrOrFloat] = None, | |
39 | key: Callable[[str], str] = lambda x: x, | |
40 | nan: float = float("inf"), | |
42 | 41 | _uni: Callable[[str, StrOrFloat], StrOrFloat] = unicodedata.numeric, |
43 | 42 | _nan_inf: FrozenSet[str] = NAN_INF, |
44 | 43 | _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, |
55 | 54 | String to attempt to convert to a float. |
56 | 55 | key : callable |
57 | 56 | Single-argument function to apply to *x* if conversion fails. |
58 | nan : object | |
57 | nan : float | |
59 | 58 | Value to return instead of NaN if NaN would be returned. |
60 | 59 | |
61 | 60 | Returns |
66 | 65 | if x[0] in _first_char or x.lstrip()[:3] in _nan_inf: |
67 | 66 | try: |
68 | 67 | ret = float(x) |
69 | return nan if nan is not None and ret != ret else ret | |
68 | return nan if ret != ret else ret | |
70 | 69 | except ValueError: |
71 | 70 | try: |
72 | 71 | return _uni(x, key(x)) if len(x) == 1 else key(x) |
79 | 78 | return key(x) |
80 | 79 | |
81 | 80 | |
82 | # noinspection PyIncorrectDocstring | |
83 | 81 | def fast_int( |
84 | 82 | x: str, |
85 | key: Callable[[str], StrOrInt] = lambda x: x, | |
83 | key: Callable[[str], str] = lambda x: x, | |
86 | 84 | _uni: Callable[[str, StrOrInt], StrOrInt] = unicodedata.digit, |
87 | 85 | _first_char: FrozenSet[str] = POTENTIAL_FIRST_CHAR, |
88 | 86 | ) -> StrOrInt: |
3 | 3 | having to worry if it is actually installed. |
4 | 4 | """ |
5 | 5 | import re |
6 | from typing import Callable, Iterable, Iterator, Tuple, Union | |
6 | 7 | |
7 | __all__ = ["fast_float", "fast_int"] | |
8 | StrOrFloat = Union[str, float] | |
9 | StrOrInt = Union[str, int] | |
10 | ||
11 | __all__ = ["try_float", "try_int"] | |
8 | 12 | |
9 | 13 | |
10 | def is_supported_fastnumbers(fastnumbers_version: str) -> bool: | |
14 | def is_supported_fastnumbers( | |
15 | fastnumbers_version: str, minimum: Tuple[int, int, int] = (2, 0, 0) | |
16 | ) -> bool: | |
11 | 17 | match = re.match( |
12 | 18 | r"^(\d+)\.(\d+)(\.(\d+))?([ab](\d+))?$", |
13 | 19 | fastnumbers_version, |
21 | 27 | |
22 | 28 | (major, minor, patch) = match.group(1, 2, 4) |
23 | 29 | |
24 | return (int(major), int(minor), int(patch)) >= (2, 0, 0) | |
30 | return (int(major), int(minor), int(patch)) >= minimum | |
25 | 31 | |
26 | 32 | |
27 | 33 | # If the user has fastnumbers installed, they will get great speed |
33 | 39 | # Require >= version 2.0.0. |
34 | 40 | if not is_supported_fastnumbers(fn_ver): |
35 | 41 | raise ImportError # pragma: no cover |
42 | ||
43 | # For versions of fastnumbers with mapping capability, use that | |
44 | if is_supported_fastnumbers(fn_ver, (5, 0, 0)): | |
45 | del fast_float, fast_int | |
46 | from fastnumbers import try_float, try_int | |
36 | 47 | except ImportError: |
37 | 48 | from natsort.compat.fake_fastnumbers import fast_float, fast_int # type: ignore |
49 | ||
50 | # Re-map the old-or-compatibility functions fast_float/fast_int to the | |
51 | # newer API of try_float/try_int. If we already imported try_float/try_int | |
52 | # then there is nothing to do. | |
53 | if "try_float" not in globals(): | |
54 | ||
55 | def try_float( # type: ignore[no-redef] # noqa: F811 | |
56 | x: Iterable[str], | |
57 | map: bool, | |
58 | nan: float = float("inf"), | |
59 | on_fail: Callable[[str], str] = lambda x: x, | |
60 | ) -> Iterator[StrOrFloat]: | |
61 | assert map is True | |
62 | return (fast_float(y, nan=nan, key=on_fail) for y in x) | |
63 | ||
64 | ||
65 | if "try_int" not in globals(): | |
66 | ||
67 | def try_int( # type: ignore[no-redef] # noqa: F811 | |
68 | x: Iterable[str], | |
69 | map: bool, | |
70 | on_fail: Callable[[str], str] = lambda x: x, | |
71 | ) -> Iterator[StrOrInt]: | |
72 | assert map is True | |
73 | return (fast_int(y, key=on_fail) for y in x) |
8 | 8 | import platform |
9 | 9 | from functools import partial |
10 | 10 | from operator import itemgetter |
11 | from pathlib import PurePath | |
11 | 12 | from typing import ( |
12 | 13 | Any, |
13 | 14 | Callable, |
286 | 287 | ['num2', 'num3', 'num5'] |
287 | 288 | |
288 | 289 | """ |
290 | if alg & ns.PRESORT: | |
291 | seq = sorted(seq, reverse=reverse, key=str) | |
289 | 292 | return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg)) |
290 | 293 | |
291 | 294 | |
475 | 478 | |
476 | 479 | # Pair the index and sequence together, then sort by element |
477 | 480 | index_seq_pair = [(x, y) for x, y in enumerate(seq)] |
481 | if alg & ns.PRESORT: | |
482 | index_seq_pair.sort(reverse=reverse, key=lambda x: str(itemgetter(1)(x))) | |
478 | 483 | index_seq_pair.sort(reverse=reverse, key=natsort_keygen(newkey, alg)) |
479 | 484 | return [x for x, _ in index_seq_pair] |
480 | 485 | |
668 | 673 | ) -> Iterator[str]: |
669 | 674 | if key is not None: |
670 | 675 | v = key(v) |
671 | return utils.path_splitter(str(v)) | |
676 | if not isinstance(v, (str, PurePath)): | |
677 | v = str(v) | |
678 | return utils.path_splitter(v) | |
672 | 679 | |
673 | 680 | |
674 | 681 | # Choose the implementation based on the host OS |
675 | 682 | if platform.system() == "Windows": |
676 | ||
677 | 683 | from ctypes import wintypes, windll # type: ignore |
678 | 684 | from functools import cmp_to_key |
679 | 685 | |
691 | 697 | ) |
692 | 698 | |
693 | 699 | else: |
694 | ||
695 | 700 | # For UNIX-based platforms, ICU performs MUCH better than locale |
696 | 701 | # at replicating the file explorer's sort order. We will use |
697 | 702 | # ICU's ability to do basic natural sorting as it also better |
766 | 771 | seq: Iterable[T], |
767 | 772 | key: Optional[Callable[[T], NatsortInType]] = None, |
768 | 773 | reverse: bool = False, |
774 | presort: bool = False, | |
769 | 775 | ) -> List[T]: |
770 | 776 | """ |
771 | 777 | Sort elements in the same order as your operating system's file browser |
808 | 814 | Return the list in reversed sorted order. The default is |
809 | 815 | `False`. |
810 | 816 | |
817 | presort : {{True, False}}, optional | |
818 | Equivalent to adding ``ns.PRESORT``, see :class:`ns` for | |
819 | documentation. The default is `False`. | |
820 | ||
811 | 821 | Returns |
812 | 822 | ------- |
813 | 823 | out : list |
823 | 833 | This will implicitly coerce all inputs to str before collating. |
824 | 834 | |
825 | 835 | """ |
826 | return sorted(seq, key=os_sort_keygen(key), reverse=reverse) | |
836 | if presort: | |
837 | seq = sorted(seq, reverse=reverse, key=str) | |
838 | return sorted(seq, reverse=reverse, key=os_sort_keygen(key)) |
113 | 113 | treat these as +Infinity and place them after all the other numbers. |
114 | 114 | By default, an NaN be treated as -Infinity and be placed first. |
115 | 115 | Note that this ``None`` is treated like NaN internally. |
116 | PRESORT, PS | |
117 | Sort the input as strings before sorting with the `nasort` | |
118 | algorithm. This can help eliminate inconsistent sorting in cases | |
119 | where two different strings represent the same number. For example, | |
120 | "a1" and "a01" both are internally represented as ("a", "1), so | |
121 | without `PRESORT` the order of these two values would depend on | |
122 | the order they appeared in the input (because Python's `sorted` | |
123 | is a stable sorting algorithm). | |
116 | 124 | |
117 | 125 | Notes |
118 | 126 | ----- |
142 | 150 | NANLAST = NL = 1 << next(_counter) |
143 | 151 | COMPATIBILITYNORMALIZE = CN = 1 << next(_counter) |
144 | 152 | NUMAFTER = NA = 1 << next(_counter) |
153 | PRESORT = PS = 1 << next(_counter) | |
145 | 154 | |
146 | 155 | # Following were previously options but are now defaults. |
147 | 156 | DEFAULT = 0 |
1518 | 1518 | 0x16A67, |
1519 | 1519 | 0x16A68, |
1520 | 1520 | 0x16A69, |
1521 | 0x16AC0, | |
1522 | 0x16AC1, | |
1523 | 0x16AC2, | |
1524 | 0x16AC3, | |
1525 | 0x16AC4, | |
1526 | 0x16AC5, | |
1527 | 0x16AC6, | |
1528 | 0x16AC7, | |
1529 | 0x16AC8, | |
1530 | 0x16AC9, | |
1521 | 1531 | 0x16B50, |
1522 | 1532 | 0x16B51, |
1523 | 1533 | 0x16B52, |
60 | 60 | ) |
61 | 61 | from unicodedata import normalize |
62 | 62 | |
63 | from natsort.compat.fastnumbers import fast_float, fast_int | |
63 | from natsort.compat.fastnumbers import try_float, try_int | |
64 | 64 | from natsort.compat.locale import ( |
65 | 65 | StrOrBytes, |
66 | 66 | get_decimal_point, |
110 | 110 | |
111 | 111 | # For the string component transform factory |
112 | 112 | StrBytesNum = Union[str, bytes, float, int] |
113 | StrTransformer = Callable[[str], StrBytesNum] | |
113 | StrTransformer = Callable[[Iterable[str]], Iterator[StrBytesNum]] | |
114 | 114 | |
115 | 115 | # For the final data transform factory |
116 | 116 | FinalTransform = AnyTuple |
328 | 328 | -------- |
329 | 329 | parse_string_factory |
330 | 330 | parse_bytes_factory |
331 | parse_number_factory | |
331 | parse_number_or_none_factory | |
332 | 332 | |
333 | 333 | """ |
334 | 334 | |
336 | 336 | if key is not None: |
337 | 337 | val = key(val) |
338 | 338 | |
339 | # Assume the input are strings, which is the most common case | |
340 | try: | |
341 | return string_func(cast(str, val)) | |
342 | except (TypeError, AttributeError): | |
343 | ||
344 | # If bytes type, use the bytes_func | |
345 | if type(val) in (bytes,): | |
346 | return bytes_func(cast(bytes, val)) | |
347 | ||
348 | # Otherwise, assume it is an iterable that must be parsed recursively. | |
349 | # Do not apply the key recursively. | |
350 | try: | |
351 | return tuple( | |
352 | natsort_key(x, None, string_func, bytes_func, num_func) | |
353 | for x in cast(Iterable[Any], val) | |
354 | ) | |
355 | ||
356 | # If that failed, it must be a number. | |
357 | except TypeError: | |
358 | return num_func(val) | |
339 | if isinstance(val, (str, PurePath)): | |
340 | return string_func(val) | |
341 | elif isinstance(val, bytes): | |
342 | return bytes_func(val) | |
343 | elif isinstance(val, Iterable): | |
344 | # Must be parsed recursively, but do not apply the key recursively. | |
345 | return tuple( | |
346 | natsort_key(x, None, string_func, bytes_func, num_func) for x in val | |
347 | ) | |
348 | else: # Anything else goes here | |
349 | return num_func(val) | |
359 | 350 | |
360 | 351 | |
361 | 352 | def parse_bytes_factory(alg: NSType) -> BytesTransformer: |
425 | 416 | nan_replace = float("+inf") if alg & ns.NANLAST else float("-inf") |
426 | 417 | |
427 | 418 | def func( |
428 | val: Any, _nan_replace: float = nan_replace, _sep: StrOrBytes = sep | |
419 | val: Any, | |
420 | _nan_replace: float = nan_replace, | |
421 | _sep: StrOrBytes = sep, | |
422 | reverse: bool = nan_replace == float("+inf"), | |
429 | 423 | ) -> BasicTuple: |
430 | 424 | """Given a number, place it in a tuple with a leading null string.""" |
431 | return _sep, (_nan_replace if val != val or val is None else val) | |
425 | # Add a trailing string numbers equaling _nan_replace. This will make | |
426 | # the ordering between None NaN, and the NaN replacement value... | |
427 | # None comes first, then NaN, then the replacement value. | |
428 | if val != val: | |
429 | return _sep, _nan_replace, "3" if reverse else "1" | |
430 | elif val is None: | |
431 | return _sep, _nan_replace, "2" | |
432 | elif val == _nan_replace: | |
433 | return _sep, _nan_replace, "1" if reverse else "3" | |
434 | else: | |
435 | return _sep, val | |
432 | 436 | |
433 | 437 | # Return the function, possibly wrapping in tuple if PATH is selected. |
434 | 438 | if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: |
513 | 517 | c = compose_input(b) # Decompose unicode if using LOCALE |
514 | 518 | d = splitter(c) # Split string into components. |
515 | 519 | e = filter(None, d) # Remove empty strings. |
516 | f = map(component_transform, e) # Apply transform on components. | |
520 | f = component_transform(e) # Apply transform on components. | |
517 | 521 | g = sep_inserter(f, sep) # Insert '' between numbers. |
518 | 522 | return final_transform(g, original) # Apply the final transform. |
519 | 523 | |
696 | 700 | func_chain.append(get_strxfrm()) |
697 | 701 | |
698 | 702 | # Return the correct chained functions. |
699 | kwargs: Dict[str, Union[float, Callable[[str], StrOrBytes]]] | |
700 | kwargs = {"key": chain_functions(func_chain)} if func_chain else {} | |
703 | kwargs: Dict[str, Union[float, Callable[[str], StrOrBytes], bool]] | |
704 | kwargs = {"on_fail": chain_functions(func_chain)} if func_chain else {} | |
705 | kwargs["map"] = True | |
701 | 706 | if alg & ns.FLOAT: |
702 | # noinspection PyTypeChecker | |
703 | 707 | kwargs["nan"] = nan_val |
704 | return cast(Callable[[str], StrOrBytes], partial(fast_float, **kwargs)) | |
708 | return cast(StrTransformer, partial(try_float, **kwargs)) | |
705 | 709 | else: |
706 | return cast(Callable[[str], StrOrBytes], partial(fast_int, **kwargs)) | |
710 | return cast(StrTransformer, partial(try_int, **kwargs)) | |
707 | 711 | |
708 | 712 | |
709 | 713 | def final_data_transform_factory( |
863 | 867 | *s* if *s* was not *bytes*. |
864 | 868 | |
865 | 869 | """ |
866 | try: | |
867 | return cast(bytes, s).decode(encoding) | |
868 | except (AttributeError, TypeError): | |
870 | if isinstance(s, bytes): | |
871 | return s.decode(encoding) | |
872 | else: | |
869 | 873 | return s |
870 | 874 | |
871 | 875 |
0 | 0 | [bumpversion] |
1 | current_version = 8.2.0 | |
1 | current_version = 8.3.1 | |
2 | 2 | commit = True |
3 | 3 | tag = True |
4 | 4 | tag_name = {new_version} |
11 | 11 | long_description = file: README.rst |
12 | 12 | long_description_content_type = text/x-rst |
13 | 13 | license = MIT |
14 | license_file = LICENSE | |
14 | license_files = LICENSE | |
15 | 15 | classifiers = |
16 | 16 | Development Status :: 5 - Production/Stable |
17 | 17 | Intended Audience :: Developers |
24 | 24 | Natural Language :: English |
25 | 25 | Programming Language :: Python |
26 | 26 | Programming Language :: Python :: 3 |
27 | Programming Language :: Python :: 3.6 | |
28 | 27 | Programming Language :: Python :: 3.7 |
29 | 28 | Programming Language :: Python :: 3.8 |
30 | 29 | Programming Language :: Python :: 3.9 |
31 | 30 | Programming Language :: Python :: 3.10 |
31 | Programming Language :: Python :: 3.11 | |
32 | 32 | Topic :: Scientific/Engineering :: Information Analysis |
33 | 33 | Topic :: Utilities |
34 | 34 | Topic :: Text Processing |
3 | 3 | |
4 | 4 | setup( |
5 | 5 | name="natsort", |
6 | version="8.2.0", | |
6 | version="8.3.1", | |
7 | 7 | packages=find_packages(), |
8 | 8 | entry_points={"console_scripts": ["natsort = natsort.__main__:main"]}, |
9 | python_requires=">=3.6", | |
9 | python_requires=">=3.7", | |
10 | 10 | extras_require={"fast": ["fastnumbers >= 2.0.0"], "icu": ["PyICU >= 1.0.0"]}, |
11 | 11 | package_data={"": ["py.typed"]}, |
12 | 12 | zip_safe=False, |
3 | 3 | """ |
4 | 4 | |
5 | 5 | import unicodedata |
6 | from math import isnan | |
6 | from math import isinf | |
7 | 7 | from typing import Union, cast |
8 | 8 | |
9 | 9 | from hypothesis import given |
61 | 61 | def test_fast_float_converts_float_string_to_float_example() -> None: |
62 | 62 | assert fast_float("45.8") == 45.8 |
63 | 63 | assert fast_float("-45") == -45.0 |
64 | assert fast_float("45.8e-2", key=len) == 45.8e-2 | |
65 | assert isnan(cast(float, fast_float("nan"))) | |
66 | assert isnan(cast(float, fast_float("+nan"))) | |
67 | assert isnan(cast(float, fast_float("-NaN"))) | |
64 | assert fast_float("45.8e-2", key=lambda x: x.upper()) == 45.8e-2 | |
65 | assert isinf(cast(float, fast_float("nan"))) | |
66 | assert isinf(cast(float, fast_float("+nan"))) | |
67 | assert isinf(cast(float, fast_float("-NaN"))) | |
68 | 68 | assert fast_float("۱۲.۱۲") == 12.12 |
69 | 69 | assert fast_float("-۱۲.۱۲") == -12.12 |
70 | 70 | |
84 | 84 | |
85 | 85 | |
86 | 86 | def test_fast_float_with_key_applies_to_string_example() -> None: |
87 | assert fast_float("invalid", key=len) == len("invalid") | |
87 | assert fast_float("invalid", key=lambda x: x.upper()) == "INVALID" | |
88 | 88 | |
89 | 89 | |
90 | 90 | @given(text().filter(not_a_float).filter(bool)) |
91 | 91 | def test_fast_float_with_key_applies_to_string(x: str) -> None: |
92 | assert fast_float(x, key=len) == len(x) | |
92 | assert fast_float(x, key=lambda x: x.upper()) == x.upper() | |
93 | 93 | |
94 | 94 | |
95 | 95 | def test_fast_int_leaves_float_string_as_is_example() -> None: |
125 | 125 | |
126 | 126 | |
127 | 127 | def test_fast_int_with_key_applies_to_string_example() -> None: |
128 | assert fast_int("invalid", key=len) == len("invalid") | |
128 | assert fast_int("invalid", key=lambda x: x.upper()) == "INVALID" | |
129 | 129 | |
130 | 130 | |
131 | 131 | @given(text().filter(not_an_int).filter(bool)) |
132 | 132 | def test_fast_int_with_key_applies_to_string(x: str) -> None: |
133 | assert fast_int(x, key=len) == len(x) | |
133 | assert fast_int(x, key=lambda x: x.upper()) == x.upper() |
3 | 3 | See the README or the natsort homepage for more details. |
4 | 4 | """ |
5 | 5 | |
6 | import math | |
6 | 7 | from operator import itemgetter |
7 | 8 | from pathlib import PurePosixPath |
8 | 9 | from typing import List, Tuple, Union |
109 | 110 | |
110 | 111 | |
111 | 112 | @pytest.mark.parametrize( |
112 | "alg, expected, slc", | |
113 | [ | |
114 | (ns.DEFAULT, [float("nan"), 5, "25", 1e40], slice(1, None)), | |
115 | (ns.NANLAST, [5, "25", 1e40, float("nan")], slice(None, 3)), | |
116 | ], | |
117 | ) | |
118 | def test_natsorted_handles_nan( | |
119 | alg: NSType, expected: List[Union[str, float, int]], slc: slice | |
120 | ) -> None: | |
121 | given: List[Union[str, float, int]] = ["25", 5, float("nan"), 1e40] | |
122 | # The slice is because NaN != NaN | |
123 | # noinspection PyUnresolvedReferences | |
124 | assert natsorted(given, alg=alg)[slc] == expected[slc] | |
113 | "alg, expected", | |
114 | [ | |
115 | (ns.DEFAULT, [float("nan"), None, float("-inf"), 5, "25", 1e40, float("inf")]), | |
116 | (ns.NANLAST, [float("-inf"), 5, "25", 1e40, float("inf"), None, float("nan")]), | |
117 | ], | |
118 | ) | |
119 | def test_natsorted_consistent_ordering_with_nan_and_friends( | |
120 | alg: NSType, expected: List[Union[str, float, None, int]] | |
121 | ) -> None: | |
122 | sentinel = math.pi | |
123 | expected = [sentinel if x != x else x for x in expected] | |
124 | given: List[Union[str, float, None, int]] = [ | |
125 | float("inf"), | |
126 | float("-inf"), | |
127 | "25", | |
128 | 5, | |
129 | float("nan"), | |
130 | 1e40, | |
131 | None, | |
132 | ] | |
133 | result = natsorted(given, alg=alg) | |
134 | result = [sentinel if x != x else x for x in result] | |
135 | assert result == expected | |
125 | 136 | |
126 | 137 | |
127 | 138 | def test_natsorted_with_mixed_bytes_and_str_input_raises_type_error() -> None: |
366 | 377 | "street ۱۲", |
367 | 378 | ] |
368 | 379 | assert natsorted(given, alg=ns.IGNORECASE) == expected |
380 | ||
381 | ||
382 | def test_natsort_sorts_consistently_with_presort() -> None: | |
383 | # Demonstrate the problem: | |
384 | # Sorting is order-dependent for values that have different | |
385 | # string representations are equiavlent numerically. | |
386 | given = ["a01", "a1.4500", "a1", "a1.45"] | |
387 | expected = ["a01", "a1", "a1.4500", "a1.45"] | |
388 | result = natsorted(given, alg=ns.FLOAT) | |
389 | assert result == expected | |
390 | ||
391 | given = ["a1", "a1.45", "a01", "a1.4500"] | |
392 | expected = ["a1", "a01", "a1.45", "a1.4500"] | |
393 | result = natsorted(given, alg=ns.FLOAT) | |
394 | assert result == expected | |
395 | ||
396 | # The solution - use "presort" which will sort the | |
397 | # input by its string representation before sorting | |
398 | # with natsorted, which gives consitent results even | |
399 | # if the numeric representation is identical | |
400 | expected = ["a01", "a1", "a1.45", "a1.4500"] | |
401 | ||
402 | given = ["a01", "a1.4500", "a1", "a1.45"] | |
403 | result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) | |
404 | assert result == expected | |
405 | ||
406 | given = ["a1", "a1.45", "a01", "a1.4500"] | |
407 | result = natsorted(given, alg=ns.FLOAT | ns.PRESORT) | |
408 | assert result == expected |
87 | 87 | assert index_natsorted(given, key=itemgetter(1)) == expected |
88 | 88 | |
89 | 89 | |
90 | def test_index_natsorted_can_presort() -> None: | |
91 | expected = [2, 0, 3, 1] | |
92 | given = ["a1", "a1.4500", "a01", "a1.45"] | |
93 | result = index_natsorted(given, alg=ns.FLOAT | ns.PRESORT) | |
94 | assert result == expected | |
95 | ||
96 | ||
90 | 97 | def test_index_realsorted_is_identical_to_index_natsorted_with_real_alg( |
91 | 98 | float_list: List[str], |
92 | 99 | ) -> None: |
17 | 17 | ("NANLAST", 0x0400), |
18 | 18 | ("COMPATIBILITYNORMALIZE", 0x0800), |
19 | 19 | ("NUMAFTER", 0x1000), |
20 | ("PRESORT", 0x2000), | |
20 | 21 | ("DEFAULT", 0x0000), |
21 | 22 | ("INT", 0x0000), |
22 | 23 | ("UNSIGNED", 0x0000), |
41 | 42 | ("NL", 0x0400), |
42 | 43 | ("CN", 0x0800), |
43 | 44 | ("NA", 0x1000), |
45 | ("PS", 0x2000), | |
44 | 46 | ], |
45 | 47 | ) |
46 | 48 | def test_ns_enum(given: str, expected: int) -> None: |
43 | 43 | given = ["foo0", "foo2", "goo1"] |
44 | 44 | expected = ["foo0", "goo1", "foo2"] |
45 | 45 | result = natsort.os_sorted(given, key=lambda x: x.replace("g", "f")) |
46 | assert result == expected | |
47 | ||
48 | ||
49 | def test_os_sorted_can_presort() -> None: | |
50 | given = ["a1", "a01"] | |
51 | expected = ["a01", "a1"] | |
52 | result = natsort.os_sorted(given, presort=True) | |
46 | 53 | assert result == expected |
47 | 54 | |
48 | 55 |
19 | 19 | (ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, lambda x: ((("xx",), ("", x)),)), |
20 | 20 | ], |
21 | 21 | ) |
22 | @given(x=floats(allow_nan=False) | integers()) | |
22 | @given(x=floats(allow_nan=False, allow_infinity=False) | integers()) | |
23 | 23 | def test_parse_number_factory_makes_function_that_returns_tuple( |
24 | 24 | x: Union[float, int], alg: NSType, example_func: NumTransformer |
25 | 25 | ) -> None: |
31 | 31 | "alg, x, result", |
32 | 32 | [ |
33 | 33 | (ns.DEFAULT, 57, ("", 57)), |
34 | (ns.DEFAULT, float("nan"), ("", float("-inf"))), # NaN transformed to -infinity | |
35 | (ns.NANLAST, float("nan"), ("", float("+inf"))), # NANLAST makes it +infinity | |
36 | (ns.DEFAULT, None, ("", float("-inf"))), # None transformed to -infinity | |
37 | (ns.NANLAST, None, ("", float("+inf"))), # NANLAST makes it +infinity | |
34 | ( | |
35 | ns.DEFAULT, | |
36 | float("nan"), | |
37 | ("", float("-inf"), "1"), | |
38 | ), # NaN transformed to -infinity | |
39 | ( | |
40 | ns.NANLAST, | |
41 | float("nan"), | |
42 | ("", float("+inf"), "3"), | |
43 | ), # NANLAST makes it +infinity | |
44 | (ns.DEFAULT, None, ("", float("-inf"), "2")), # None transformed to -infinity | |
45 | (ns.NANLAST, None, ("", float("+inf"), "2")), # NANLAST makes it +infinity | |
46 | (ns.DEFAULT, float("-inf"), ("", float("-inf"), "3")), | |
47 | (ns.NANLAST, float("+inf"), ("", float("+inf"), "1")), | |
38 | 48 | ], |
39 | 49 | ) |
40 | 50 | def test_parse_number_factory_treats_nan_and_none_special( |
6 | 6 | import pytest |
7 | 7 | from hypothesis import given |
8 | 8 | from hypothesis.strategies import floats, integers, lists, text |
9 | from natsort.compat.fastnumbers import fast_float | |
9 | from natsort.compat.fastnumbers import try_float | |
10 | 10 | from natsort.ns_enum import NSType, NS_DUMB, ns |
11 | 11 | from natsort.utils import ( |
12 | 12 | FinalTransform, |
45 | 45 | sep, |
46 | 46 | NumRegex.int_nosign().split, |
47 | 47 | input_transform, |
48 | fast_float, | |
48 | lambda x: try_float(x, map=True), | |
49 | 49 | final_transform, |
50 | 50 | ) |
51 | 51 |
4 | 4 | from typing import Any, Callable, FrozenSet, Union |
5 | 5 | |
6 | 6 | import pytest |
7 | from hypothesis import example, given | |
7 | from hypothesis import assume, example, given | |
8 | 8 | from hypothesis.strategies import floats, integers, text |
9 | from natsort.compat.fastnumbers import fast_float, fast_int | |
9 | from natsort.compat.fastnumbers import try_float, try_int | |
10 | 10 | from natsort.compat.locale import get_strxfrm |
11 | 11 | from natsort.ns_enum import NSType, NS_DUMB, ns |
12 | 12 | from natsort.utils import groupletters, string_component_transform_factory |
31 | 31 | return "\0" not in x |
32 | 32 | |
33 | 33 | |
34 | def input_is_ok_with_locale(x: str) -> bool: | |
35 | """Ensure this input won't cause locale.strxfrm to barf""" | |
36 | # On FreeBSD, locale.strxfrm raises an OSError on input like 'Å'. | |
37 | # You read that right - an *OSError* for invalid input. | |
38 | # We cannot really fix that, so we just filter out any value | |
39 | # that could cause locale.strxfrm to barf with this function. | |
40 | try: | |
41 | get_strxfrm()(x) | |
42 | except OSError: | |
43 | return False | |
44 | else: | |
45 | return True | |
46 | ||
47 | ||
34 | 48 | @pytest.mark.parametrize( |
35 | 49 | "alg, example_func", |
36 | 50 | [ |
37 | (ns.INT, fast_int), | |
38 | (ns.DEFAULT, fast_int), | |
39 | (ns.FLOAT, partial(fast_float, nan=float("-inf"))), | |
40 | (ns.FLOAT | ns.NANLAST, partial(fast_float, nan=float("+inf"))), | |
41 | (ns.GROUPLETTERS, partial(fast_int, key=groupletters)), | |
42 | (ns.LOCALE, partial(fast_int, key=lambda x: get_strxfrm()(x))), | |
51 | (ns.INT, partial(try_int, map=True)), | |
52 | (ns.DEFAULT, partial(try_int, map=True)), | |
53 | (ns.FLOAT, partial(try_float, map=True, nan=float("-inf"))), | |
54 | (ns.FLOAT | ns.NANLAST, partial(try_float, map=True, nan=float("+inf"))), | |
55 | (ns.GROUPLETTERS, partial(try_int, map=True, on_fail=groupletters)), | |
56 | (ns.LOCALE, partial(try_int, map=True, on_fail=lambda x: get_strxfrm()(x))), | |
43 | 57 | ( |
44 | 58 | ns.GROUPLETTERS | ns.LOCALE, |
45 | partial(fast_int, key=lambda x: get_strxfrm()(groupletters(x))), | |
59 | partial( | |
60 | try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x)) | |
61 | ), | |
46 | 62 | ), |
47 | 63 | ( |
48 | 64 | NS_DUMB | ns.LOCALE, |
49 | partial(fast_int, key=lambda x: get_strxfrm()(groupletters(x))), | |
65 | partial( | |
66 | try_int, map=True, on_fail=lambda x: get_strxfrm()(groupletters(x)) | |
67 | ), | |
50 | 68 | ), |
51 | 69 | ( |
52 | 70 | ns.GROUPLETTERS | ns.LOCALE | ns.FLOAT | ns.NANLAST, |
53 | 71 | partial( |
54 | fast_float, | |
55 | key=lambda x: get_strxfrm()(groupletters(x)), | |
72 | try_float, | |
73 | map=True, | |
74 | on_fail=lambda x: get_strxfrm()(groupletters(x)), | |
56 | 75 | nan=float("+inf"), |
57 | 76 | ), |
58 | 77 | ), |
59 | 78 | ], |
60 | 79 | ) |
61 | 80 | @example(x=float("nan")) |
81 | @example(x="Å") | |
62 | 82 | @given( |
63 | 83 | x=integers() |
64 | 84 | | floats() |
69 | 89 | x: Union[str, float, int], alg: NSType, example_func: Callable[[str], Any] |
70 | 90 | ) -> None: |
71 | 91 | string_component_transform_func = string_component_transform_factory(alg) |
92 | x = str(x) | |
93 | assume(input_is_ok_with_locale(x)) # handle broken locale lib on BSD. | |
72 | 94 | try: |
73 | assert string_component_transform_func(str(x)) == example_func(str(x)) | |
95 | assert list(string_component_transform_func(x)) == list(example_func(x)) | |
74 | 96 | except ValueError as e: # handle broken locale lib on BSD. |
75 | 97 | if "is not in range" not in str(e): |
76 | 98 | raise |
33 | 33 | assert unicodedata.decimal(a, None) is not None |
34 | 34 | |
35 | 35 | |
36 | def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters() -> None: | |
36 | def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters() -> ( | |
37 | None | |
38 | ): | |
37 | 39 | set_numeric_chars = set(numeric_chars) |
38 | 40 | set_digit_chars = set(digit_chars) |
39 | 41 | set_decimal_chars = set(decimal_chars) |
66 | 68 | version of Python. |
67 | 69 | It would be much appreciated if you would submit a Pull Request to the natsort |
68 | 70 | repository (https://github.com/SethMMorton/natsort) with the resulting change. |
69 | """ | |
71 | """, | |
72 | stacklevel=2, | |
70 | 73 | ) |
71 | 74 | |
72 | 75 |
4 | 4 | |
5 | 5 | [tox] |
6 | 6 | envlist = |
7 | flake8, mypy, py36, py37, py38, py39, py310 | |
7 | flake8, mypy, py37, py38, py39, py310, py311 | |
8 | 8 | # Other valid environments are: |
9 | 9 | # docs |
10 | 10 | # release |
24 | 24 | pytest-cov |
25 | 25 | pytest-mock |
26 | 26 | hypothesis |
27 | semver | |
28 | 27 | extras = |
29 | 28 | {env:WITH_EXTRAS:} |
30 | 29 | commands = |
45 | 44 | check-manifest |
46 | 45 | twine |
47 | 46 | commands = |
48 | {envpython} setup.py sdist bdist_wheel | |
47 | {envpython} setup.py sdist | |
48 | pip wheel . -w dist | |
49 | 49 | flake8 |
50 | 50 | check-manifest --ignore ".github*,*.md,.coveragerc" |
51 | 51 | twine check dist/* |
58 | 58 | hypothesis |
59 | 59 | pytest |
60 | 60 | pytest-mock |
61 | fastnumbers | |
61 | fastnumbers>=5.0.1 | |
62 | 62 | typing_extensions |
63 | 63 | commands = |
64 | 64 | mypy --strict natsort tests |
104 | 104 | # Get GitHub actions to run the correct tox environment |
105 | 105 | [gh-actions] |
106 | 106 | python = |
107 | 3.5: py35 | |
108 | 3.6: py36 | |
109 | 107 | 3.7: py37 |
110 | 108 | 3.8: py38 |
111 | 109 | 3.9: py39 |
112 | 110 | 3.10: py310 |
111 | 3.11: py311 |