Merge branch 'new-upstream-release' into 'master'
New upstream release
See merge request debian/natsort!4
Agustin Henze
5 years ago
14 | 14 | |
15 | 15 | ignore_errors = True |
16 | 16 | |
17 | # Files to not perform coverage on | |
18 | omit = | |
19 | natsort/__init__.* | |
20 | natsort/py23compat.* | |
21 | natsort/_version.* |
12 | 12 | sdist |
13 | 13 | develop-eggs |
14 | 14 | .installed.cfg |
15 | .python-version | |
15 | 16 | |
16 | 17 | # We are using MANIFEST.in instead |
17 | 18 | MANIFEST |
24 | 25 | .coverage |
25 | 26 | .tox |
26 | 27 | .cache |
28 | .pytest_cache | |
27 | 29 | .pytest |
30 | .envrc | |
28 | 31 | |
29 | 32 | #Translations |
30 | 33 | *.mo |
31 | 34 | |
32 | 35 | #Mr Developer |
33 | 36 | .mr.developer.cfg |
37 | ||
38 | # PyCharm | |
39 | .idea |
0 | syntax: glob | |
1 | ||
2 | *.py[co] | |
3 | ||
4 | # Packages | |
5 | *.egg | |
6 | *.eggs | |
7 | *.egg-info | |
8 | dist | |
9 | build | |
10 | eggs | |
11 | parts | |
12 | bin | |
13 | var | |
14 | sdist | |
15 | develop-eggs | |
16 | .installed.cfg | |
17 | ||
18 | # We are using MANIFEST.in instead | |
19 | MANIFEST | |
20 | ||
21 | # Installer logs | |
22 | pip-log.txt | |
23 | ||
24 | # Unit test / coverage reports | |
25 | .hypothesis | |
26 | .coverage | |
27 | .tox | |
28 | .cache | |
29 | .pytest | |
30 | ||
31 | #Translations | |
32 | *.mo | |
33 | ||
34 | #Mr Developer | |
35 | .mr.developer.cfg |
0 | 0 | language: python |
1 | python: | |
2 | - 2.6 | |
3 | - 2.7 | |
4 | - 3.2 | |
5 | - 3.3 | |
6 | - 3.4 | |
7 | env: | |
8 | - WITH_OPTIONS=true | |
9 | - WITH_OPTIONS=false | |
10 | before_install: | |
11 | - sudo apt-get update | |
12 | - sudo locale-gen de_DE.UTF-8 | |
13 | - sudo apt-get install bc | |
1 | matrix: | |
2 | include: | |
3 | - python: "2.7" | |
4 | dist: trusty | |
5 | sudo: false | |
6 | env: WITH_EXTRAS="" | |
7 | - python: "2.7" | |
8 | dist: trusty | |
9 | sudo: false | |
10 | env: WITH_EXTRAS="fast,icu" | |
11 | addons: | |
12 | apt: | |
13 | packages: | |
14 | - libicu-dev | |
15 | - language-pack-de | |
16 | - language-pack-en | |
17 | - python: "3.4" | |
18 | dist: trusty | |
19 | sudo: false | |
20 | env: WITH_EXTRAS="" | |
21 | - python: "3.5" | |
22 | dist: trusty | |
23 | sudo: false | |
24 | env: WITH_EXTRAS="" | |
25 | - python: "3.6" | |
26 | dist: trusty | |
27 | sudo: false | |
28 | env: WITH_EXTRAS="" | |
29 | - python: "3.6" | |
30 | dist: trusty | |
31 | sudo: false | |
32 | env: WITH_EXTRAS="fast,icu" | |
33 | addons: | |
34 | apt: | |
35 | packages: | |
36 | - libicu-dev | |
37 | - language-pack-de | |
38 | - language-pack-en | |
39 | - python: "3.7" | |
40 | dist: xenial | |
41 | sudo: true | |
42 | env: WITH_EXTRAS="" | |
43 | ||
14 | 44 | install: |
15 | 45 | - pip install -U pip |
16 | - if [[ $WITH_OPTIONS == true ]]; then sudo apt-get install libicu-dev; fi | |
17 | - if [[ $WITH_OPTIONS == true ]]; then pip install fastnumbers; fi | |
18 | - if [[ $WITH_OPTIONS == true ]]; then pip install PyICU; fi | |
19 | - if [[ 1 -eq $(echo "$TRAVIS_PYTHON_VERSION < 3.4" | bc -l) ]]; then pip install pathlib; fi | |
20 | - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install argparse; fi | |
21 | - if [[ $(echo "$TRAVIS_PYTHON_VERSION < 3.3" | bc -l) ]]; then pip install mock; fi | |
22 | - pip install pytest-cov pytest-flakes pytest-pep8 hypothesis | |
23 | - pip install coveralls | |
46 | - pip install tox-travis codacy-coverage codecov | |
47 | ||
24 | 48 | script: |
25 | - python -m pytest --cov natsort --flakes --pep8 | |
26 | - python -m pytest --doctest-modules natsort | |
27 | - python -m pytest README.rst docs/source/intro.rst docs/source/examples.rst | |
49 | - tox | |
50 | ||
28 | 51 | after_success: |
29 | coveralls | |
52 | - coverage xml | |
53 | - python-codacy-coverage -r coverage.xml | |
54 | - codecov |
0 | # Contributor Covenant Code of Conduct | |
1 | ||
2 | ## Our Pledge | |
3 | ||
4 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. | |
5 | ||
6 | ## Our Standards | |
7 | ||
8 | Examples of behavior that contributes to creating a positive environment include: | |
9 | ||
10 | * Using welcoming and inclusive language | |
11 | * Being respectful of differing viewpoints and experiences | |
12 | * Gracefully accepting constructive criticism | |
13 | * Focusing on what is best for the community | |
14 | * Showing empathy towards other community members | |
15 | ||
16 | Examples of unacceptable behavior by participants include: | |
17 | ||
18 | * The use of sexualized language or imagery and unwelcome sexual attention or advances | |
19 | * Trolling, insulting/derogatory comments, and personal or political attacks | |
20 | * Public or private harassment | |
21 | * Publishing others' private information, such as a physical or electronic address, without explicit permission | |
22 | * Other conduct which could reasonably be considered inappropriate in a professional setting | |
23 | ||
24 | ## Our Responsibilities | |
25 | ||
26 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. | |
27 | ||
28 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. | |
29 | ||
30 | ## Scope | |
31 | ||
32 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. | |
33 | ||
34 | ## Enforcement | |
35 | ||
36 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at drtuba78@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. | |
37 | ||
38 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. | |
39 | ||
40 | ## Attribution | |
41 | ||
42 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] | |
43 | ||
44 | [homepage]: http://contributor-covenant.org | |
45 | [version]: http://contributor-covenant.org/version/1/4/ |
0 | # Contributing | |
1 | ||
2 | If you have an idea for how to improve `natsort`, please contribute! It can | |
3 | be as simple as a bug fix or documentation update, or as complicated as a more | |
4 | robust algorithm. | |
5 | ||
6 | I do not have strong opinions on how one should contribute, so | |
7 | I have copy/pasted some text verbatim from the | |
8 | [Contributor's Guide](http://docs.python-requests.org/en/latest/dev/contributing/) section of | |
9 | [Kenneth Reitz's](http://docs.python-requests.org/en/latest/dev/contributing/) | |
10 | excellent [requests](https://github.com/kennethreitz/requests) library in | |
11 | lieu of coming up with my own. | |
12 | ||
13 | > ### Steps for Submitting Code | |
14 | ||
15 | > When contributing code, you'll want to follow this checklist: | |
16 | ||
17 | > - Fork the repository on GitHub. | |
18 | > - Run the tests to confirm they all pass on your system. | |
19 | If they don't, you'll need to investigate why they fail. | |
20 | If you're unable to diagnose this yourself, | |
21 | raise it as a bug report. | |
22 | > - Write tests that demonstrate your bug or feature. Ensure that they fail. | |
23 | > - Make your change. | |
24 | > - Run the entire test suite again, confirming that all tests pass including the | |
25 | ones you just added. | |
26 | > - Send a GitHub Pull Request to the main repository's master branch. | |
27 | GitHub Pull Requests are the expected method of code collaboration on this project. | |
28 | ||
29 | > ### Documentation Contributions | |
30 | > Documentation improvements are always welcome! The documentation files live in the | |
31 | docs/ directory of the codebase. They're written in | |
32 | [reStructuredText](http://docutils.sourceforge.net/rst.html), and use | |
33 | [Sphinx](http://sphinx-doc.org/index.html) | |
34 | to generate the full suite of documentation. | |
35 | ||
36 | > When contributing documentation, please do your best to follow the style of the | |
37 | documentation files. This means a soft-limit of 79 characters wide in your text | |
38 | files and a semi-formal, yet friendly and approachable, prose style. | |
39 | ||
40 | > When presenting Python code, use single-quoted strings ('hello' instead of "hello"). |
0 | ## Minimum, Complete, Verifiable Example | |
1 | ||
2 | See https://stackoverflow.com/help/mcve for explanation. | |
3 | ||
4 | ## Error message, Traceback, Desired behavior, Suggestion, Request, or Question |
0 | Copyright (c) 2012-2015 Seth M. Morton | |
0 | Copyright (c) 2012-2018 Seth M. Morton | |
1 | 1 | |
2 | 2 | Permission is hereby granted, free of charge, to any person obtaining a copy of |
3 | 3 | this software and associated documentation files (the "Software"), to deal in |
0 | 0 | include README.rst |
1 | 1 | include LICENSE |
2 | include natsort/natsort.py | |
3 | include natsort/_version.py | |
4 | include natsort/__main__.py | |
5 | include natsort/__init__.py | |
6 | include natsort/locale_help.py | |
7 | include natsort/utils.py | |
8 | include natsort/ns_enum.py | |
9 | include natsort/unicode_numbers.py | |
10 | include natsort/compat/__init__.py | |
11 | include natsort/compat/py23.py | |
12 | include natsort/compat/fake_fastnumbers.py | |
13 | include natsort/compat/fastnumbers.py | |
14 | include natsort/compat/locale.py | |
15 | include natsort/compat/pathlib.py | |
16 | include natsort/compat/pathlib.py | |
17 | include test_natsort/profile_natsorted.py | |
18 | include test_natsort/stress_natsort.py | |
19 | include test_natsort/slow_splitters.py | |
20 | include test_natsort/test_natsort.py | |
21 | include test_natsort/test_locale_help.py | |
22 | include test_natsort/test_fake_fastnumbers.py | |
23 | include test_natsort/test_main.py | |
24 | include test_natsort/test_utils.py | |
25 | include test_natsort/test_unicode_numbers.py | |
26 | include test_natsort/compat/__init__.py | |
27 | include test_natsort/compat/hypothesis.py | |
28 | include test_natsort/compat/locale.py | |
29 | include test_natsort/compat/mock.py | |
2 | include *.md | |
3 | include *.sh | |
4 | include Pipfile | |
30 | 5 | include setup.py |
31 | 6 | include setup.cfg |
32 | prune natsort/__pycache__ | |
33 | graft docs/source | |
7 | include tox.ini | |
8 | include .travis.yml | |
9 | include .coveragerc | |
10 | include .gitignore | |
11 | include .bumpversion.cfg | |
12 | graft docs | |
13 | graft natsort | |
14 | graft test_natsort | |
15 | global-exclude *.py[cod] __pycache__ *.so |
0 | [dev-packages] | |
1 | coverage = "*" | |
2 | pytest = "*" | |
3 | pytest-cov = "*" | |
4 | pytest-flakes = "*" | |
5 | pytest-pep8 = "*" | |
6 | hypothesis = ">=3.8.0" | |
7 | astroid = "==1.5.3" | |
8 | pytest-faulthandler = {version = "*", platform_python_implementation = "== 'CPython'"} | |
9 | ||
10 | # These packages are standard on newer python versions. | |
11 | pathlib = {version = "*", python_version = "< '3.4'"} | |
12 | mock = {version = "*", python_version = "< '3.3'"} |
0 | 0 | natsort |
1 | 1 | ======= |
2 | 2 | |
3 | .. image:: https://travis-ci.org/SethMMorton/natsort.svg?branch=master | |
3 | .. image:: https://img.shields.io/pypi/v/natsort.svg | |
4 | :target: https://pypi.org/project/natsort/ | |
5 | ||
6 | .. image:: https://img.shields.io/pypi/pyversions/natsort.svg | |
7 | :target: https://pypi.org/project/natsort/ | |
8 | ||
9 | .. image:: https://img.shields.io/pypi/l/natsort.svg | |
10 | :target: https://github.com/SethMMorton/natsort/blob/master/LICENSE | |
11 | ||
12 | .. image:: https://img.shields.io/travis/SethMMorton/natsort/master.svg?label=travis-ci | |
4 | 13 | :target: https://travis-ci.org/SethMMorton/natsort |
5 | 14 | |
6 | .. image:: https://coveralls.io/repos/SethMMorton/natsort/badge.png?branch=master | |
7 | :target: https://coveralls.io/r/SethMMorton/natsort?branch=master | |
8 | ||
9 | Natural sorting for python. | |
15 | .. image:: https://codecov.io/gh/SethMMorton/natsort/branch/master/graph/badge.svg | |
16 | :target: https://codecov.io/gh/SethMMorton/natsort | |
17 | ||
18 | .. image:: https://api.codacy.com/project/badge/Grade/f2bf04b1fc5d4792bf546f6e497cf4b8 | |
19 | :target: https://www.codacy.com/app/SethMMorton/natsort | |
20 | ||
21 | Simple yet flexible natural sorting in Python. | |
10 | 22 | |
11 | 23 | - Source Code: https://github.com/SethMMorton/natsort |
12 | - Downloads: https://pypi.python.org/pypi/natsort | |
13 | - Documentation: http://pythonhosted.org/natsort | |
14 | ||
15 | Please see `Moving from older Natsort versions`_ to see if this update requires | |
16 | you to modify your ``natsort`` calls in your code (99% of users will not). | |
24 | - Downloads: https://pypi.org/project/natsort/ | |
25 | - Documentation: http://natsort.readthedocs.io/ | |
26 | ||
27 | - `Examples and Recipes <http://natsort.readthedocs.io/en/master/examples.html>`_ | |
28 | - `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_ | |
29 | - `API <http://natsort.readthedocs.io/en/master/api.html>`_ | |
30 | - **NOTE**: The old documentation at pythonhosted.org has been taken down | |
31 | with no redirects. Please see | |
32 | `this post <https://opensource.stackexchange.com/q/5941/8999>`_ for an | |
33 | explanation into why. | |
34 | ||
35 | - `FAQ`_ | |
36 | - `Optional Dependencies`_ | |
37 | ||
38 | - `fastnumbers <https://pypi.org/project/fastnumbers>`_ >= 2.0.0 | |
39 | - `PyICU <https://pypi.org/project/PyICU>`_ >= 1.0.0 | |
17 | 40 | |
18 | 41 | Quick Description |
19 | 42 | ----------------- |
24 | 47 | |
25 | 48 | .. code-block:: python |
26 | 49 | |
27 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
50 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
28 | 51 | >>> sorted(a) |
29 | ['a1', 'a10', 'a2', 'a4', 'a9'] | |
52 | ['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in'] | |
30 | 53 | |
31 | 54 | Notice that it has the order ('1', '10', '2') - this is because the list is |
32 | 55 | being sorted in lexicographical order, which sorts numbers like you would |
33 | 56 | letters (i.e. 'b', 'ba', 'c'). |
34 | 57 | |
35 | ``natsort`` provides a function ``natsorted`` that helps sort lists "naturally", | |
36 | either as real numbers (i.e. signed/unsigned floats or ints), or as versions. | |
58 | ``natsort`` provides a function ``natsorted`` that helps sort lists | |
59 | "naturally" ("naturally" is rather ill-defined, but in general it means | |
60 | sorting based on meaning and not computer code point). | |
37 | 61 | Using ``natsorted`` is simple: |
38 | 62 | |
39 | 63 | .. code-block:: python |
40 | 64 | |
41 | 65 | >>> from natsort import natsorted |
42 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
43 | >>> natsorted(a) | |
44 | ['a1', 'a2', 'a4', 'a9', 'a10'] | |
45 | ||
46 | ``natsorted`` identifies real numbers anywhere in a string and sorts them | |
47 | naturally. | |
48 | ||
49 | Sorting versions is handled properly by default (as of ``natsort`` version >= 4.0.0): | |
66 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
67 | >>> natsorted(a) | |
68 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
69 | ||
70 | ``natsorted`` identifies numbers anywhere in a string and sorts them | |
71 | naturally. Below are some other things you can do with ``natsort`` | |
72 | (also see the `examples <http://natsort.readthedocs.io/en/master/examples.html>`_ | |
73 | for a quick start guide, or the | |
74 | `api <http://natsort.readthedocs.io/en/master/api.html>`_ for complete details). | |
75 | ||
76 | **Note**: ``natsorted`` is designed to be a drop-in replacement for the built-in | |
77 | ``sorted`` function. Like ``sorted``, ``natsorted`` `does not sort in-place`. | |
78 | To sort a list and assign the output to the same variable, you must | |
79 | explicitly assign the output to a variable: | |
80 | ||
81 | .. code-block:: python | |
82 | ||
83 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
84 | >>> natsorted(a) | |
85 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
86 | >>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list | |
87 | ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
88 | >>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a' | |
89 | >>> print(a) | |
90 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
91 | ||
92 | Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for | |
93 | an alternate way to sort in-place naturally. | |
94 | ||
95 | Examples | |
96 | -------- | |
97 | ||
98 | Sorting Versions | |
99 | ++++++++++++++++ | |
100 | ||
101 | This is handled properly by default (as of ``natsort`` version >= 4.0.0): | |
50 | 102 | |
51 | 103 | .. code-block:: python |
52 | 104 | |
55 | 107 | ['version-1.9', 'version-1.10', 'version-1.11', 'version-2.0'] |
56 | 108 | |
57 | 109 | If you need to sort release candidates, please see |
58 | `this useful hack <http://pythonhosted.org//natsort/examples.htm#rc-sorting>`_ . | |
59 | ||
60 | You can also perform locale-aware sorting (or "human sorting"), where the | |
61 | non-numeric characters are ordered based on their meaning, not on their | |
62 | ordinal value; this can be achieved with the ``humansorted`` function: | |
63 | ||
64 | .. code-block:: python | |
65 | ||
66 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
67 | >>> natsorted(a) | |
68 | ['Apple', 'Banana', 'apple', 'banana'] | |
110 | `this useful hack <http://natsort.readthedocs.io/en/master/examples.html#rc-sorting>`_. | |
111 | ||
112 | Sorting by Real Numbers (i.e. Signed Floats) | |
113 | ++++++++++++++++++++++++++++++++++++++++++++ | |
114 | ||
115 | This is useful in scientific data analysis and was | |
116 | the default behavior of ``natsorted`` for ``natsort`` | |
117 | version < 4.0.0. Use the ``realsorted`` function: | |
118 | ||
119 | .. code-block:: python | |
120 | ||
121 | >>> from natsort import realsorted, ns | |
122 | >>> # Note that when interpreting as signed floats, the below numbers are | |
123 | >>> # +5.10, -3.00, +5.30, +2.00 | |
124 | >>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data'] | |
125 | >>> natsorted(a) | |
126 | ['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data'] | |
127 | >>> natsorted(a, alg=ns.REAL) | |
128 | ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] | |
129 | >>> realsorted(a) # shortcut for natsorted with alg=ns.REAL | |
130 | ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] | |
131 | ||
132 | Locale-Aware Sorting (or "Human Sorting") | |
133 | +++++++++++++++++++++++++++++++++++++++++ | |
134 | ||
135 | This is where the non-numeric characters are also ordered based on their meaning, | |
136 | not on their ordinal value, and a locale-dependent thousands separator and decimal | |
137 | separator is accounted for in the number. | |
138 | This can be achieved with the ``humansorted`` function: | |
139 | ||
140 | .. code-block:: python | |
141 | ||
142 | >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] | |
143 | >>> natsorted(a) | |
144 | ['Apple', 'Banana', 'apple14,689', 'apple15', 'banana'] | |
69 | 145 | >>> import locale |
70 | 146 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') |
71 | 147 | 'en_US.UTF-8' |
148 | >>> natsorted(a, alg=ns.LOCALE) | |
149 | ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] | |
72 | 150 | >>> from natsort import humansorted |
73 | >>> humansorted(a) | |
74 | ['apple', 'Apple', 'banana', 'Banana'] | |
151 | >>> humansorted(a) # shortcut for natsorted with alg=ns.LOCALE | |
152 | ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] | |
75 | 153 | |
76 | 154 | You may find you need to explicitly set the locale to get this to work |
77 | 155 | (as shown in the example). |
78 | Please see the `following caveat <http://pythonhosted.org//natsort/examples.html#bug-note>`_ | |
79 | and the `Optional Dependencies`_ section | |
80 | below before using the ``humansorted`` function, *especially* if you are on a | |
81 | BSD-based system (like Mac OS X). | |
82 | ||
83 | You can sort signed floats (i.e. real numbers) using the ``realsorted``; this is | |
84 | useful in scientific data analysis. This was the default behavior of ``natsorted`` | |
85 | for ``natsort`` version < 4.0.0: | |
86 | ||
87 | .. code-block:: python | |
88 | ||
89 | >>> from natsort import realsorted | |
90 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
91 | >>> natsorted(a) | |
92 | ['num2', 'num5.3', 'num5.10', 'num-3'] | |
93 | >>> realsorted(a) | |
94 | ['num-3', 'num2', 'num5.10', 'num5.3'] | |
156 | Please see `locale issues <http://natsort.readthedocs.io/en/master/locale_issues.html>`_ and the | |
157 | `Optional Dependencies`_ section below before using the ``humansorted`` function. | |
158 | ||
159 | Further Customizing Natsort | |
160 | +++++++++++++++++++++++++++ | |
161 | ||
162 | If you need to combine multiple algorithm modifiers (such as ``ns.REAL``, | |
163 | ``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the | |
164 | bitwise OR operator (``|``). For example, | |
165 | ||
166 | .. code-block:: python | |
167 | ||
168 | >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] | |
169 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) | |
170 | ['Apple', 'apple15', 'apple14,689', 'Banana', 'banana'] | |
171 | >>> # The ns enum provides long and short forms for each option. | |
172 | >>> ns.LOCALE == ns.L | |
173 | True | |
174 | >>> # You can also customize the convenience functions, too. | |
175 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == realsorted(a, alg=ns.L | ns.IC) | |
176 | True | |
177 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == humansorted(a, alg=ns.R | ns.IC) | |
178 | True | |
179 | ||
180 | All of the available customizations can be found in the documentation for | |
181 | `the ns enum <http://natsort.readthedocs.io/en/master/ns_class.html>`_. | |
182 | ||
183 | You can also add your own custom transformation functions with the ``key`` argument. | |
184 | These can be used with ``alg`` if you wish. | |
185 | ||
186 | .. code-block:: python | |
187 | ||
188 | >>> a = ['apple2.50', '2.3apple'] | |
189 | >>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL) | |
190 | ['2.3apple', 'apple2.50'] | |
191 | ||
192 | Sorting Mixed Types | |
193 | +++++++++++++++++++ | |
95 | 194 | |
96 | 195 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
97 | 196 | when you sort: |
104 | 203 | >>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a'] |
105 | 204 | >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError |
106 | 205 | |
107 | ``natsort`` does not officially support the ``bytes`` type on Python 3, but | |
108 | convenience functions are provided that help you decode to ``str`` first: | |
206 | Handling Bytes on Python 3 | |
207 | ++++++++++++++++++++++++++ | |
208 | ||
209 | ``natsort`` does not officially support the `bytes` type on Python 3, but | |
210 | convenience functions are provided that help you decode to `str` first: | |
109 | 211 | |
110 | 212 | .. code-block:: python |
111 | 213 | |
121 | 223 | >>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56'] |
122 | 224 | True |
123 | 225 | |
124 | The natsort algorithm does other fancy things like | |
226 | Generating a Reusable Sorting Key and Sorting In-Place | |
227 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
228 | ||
229 | Under the hood, ``natsorted`` works by generating a custom sorting | |
230 | key using ``natsort_keygen`` and then passes that to the built-in | |
231 | ``sorted``. You can use the ``natsort_keygen`` function yourself to | |
232 | generate a custom sorting key to sort in-place using the ``list.sort`` | |
233 | method. | |
234 | ||
235 | .. code-block:: python | |
236 | ||
237 | >>> from natsort import natsort_keygen | |
238 | >>> natsort_key = natsort_keygen() | |
239 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
240 | >>> natsorted(a) == sorted(a, key=natsort_key) | |
241 | True | |
242 | >>> a.sort(key=natsort_key) | |
243 | >>> a | |
244 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
245 | ||
246 | All of the algorithm customizations mentioned in the `Further Customizing Natsort`_ | |
247 | section can also be applied to ``natsort_keygen`` through the *alg* keyword option. | |
248 | ||
249 | Other Useful Things | |
250 | +++++++++++++++++++ | |
125 | 251 | |
126 | 252 | - recursively descend into lists of lists |
127 | - control the case-sensitivity | |
128 | - sort file paths correctly | |
129 | - allow custom sorting keys | |
130 | - exposes a natsort_key generator to pass to ``list.sort`` | |
131 | ||
132 | Please see the package documentation for more details, including | |
133 | `examples and recipes <http://pythonhosted.org//natsort/examples.html>`_. | |
253 | - automatic unicode normalization of input data | |
254 | - `controlling the case-sensitivity <http://natsort.readthedocs.io/en/master/examples.html#case-sort>`_ | |
255 | - `sorting file paths correctly <http://natsort.readthedocs.io/en/master/examples.html#path-sort>`_ | |
256 | - `allow custom sorting keys <http://natsort.readthedocs.io/en/master/examples.html#custom-sort>`_ | |
257 | ||
258 | FAQ | |
259 | --- | |
260 | ||
261 | How do I debug ``natsort.natsorted()``? | |
262 | The best way to debug ``natsorted()`` is to generate a key using ``natsort_keygen()`` | |
263 | with the same options being passed to ``natsorted``. One can take a look at | |
264 | exactly what is being done with their input using this key - it is highly recommended | |
265 | to `look at this issue describing how to debug <https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375>`_ | |
266 | for *how* to debug, and also to review the | |
267 | `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_ | |
268 | page for *why* ``natsort`` is doing that to your data. | |
269 | ||
270 | If you are trying to sort custom classes and running into trouble, please take a look at | |
271 | https://github.com/SethMMorton/natsort/issues/60. In short, | |
272 | custom classes are not likely to be sorted correctly if one relies | |
273 | on the behavior of ``__lt__`` and the other rich comparison operators in their | |
274 | custom class - it is better to use a ``key`` function with ``natsort``, or | |
275 | use the ``natsort`` key as part of your rich comparison operator definition. | |
276 | ||
277 | How *does* ``natsort`` work? | |
278 | If you don't want to read `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_, | |
279 | here is a quick primer. | |
280 | ||
281 | ``natsort`` provides a `key function <https://docs.python.org/3/howto/sorting.html#key-functions>`_ | |
282 | that can be passed to `list.sort() <https://docs.python.org/3/library/stdtypes.html#list.sort>`_ | |
283 | or `sorted() <https://docs.python.org/3/library/functions.html#sorted>`_ in order to | |
284 | modify the default sorting behavior. This key is generated on-demand with the | |
285 | key generator ``natsort.natsort_keygen()``. ``natsort.natsorted()`` is essentially | |
286 | a wrapper for the following code: | |
287 | ||
288 | .. code-block:: python | |
289 | ||
290 | >>> from natsort import natsort_keygen | |
291 | >>> natsort_key = natsort_keygen() | |
292 | >>> sorted(['1', '10', '2'], key=natsort_key) | |
293 | ['1', '2', '10'] | |
294 | ||
295 | Users can further customize ``natsort`` sorting behavior with the ``key`` | |
296 | and/or ``alg`` options (see details in the `Further Customizing Natsort`_ | |
297 | section). | |
298 | ||
299 | The key generated by ``natsort_keygen`` *always* returns a ``tuple``. It | |
300 | does so in the following way (*some details omitted for clarity*): | |
301 | ||
302 | 1. Assume the input is a string, and attempt to split it into numbers and | |
303 | non-numbers using regular expressions. Numbers are then converted into | |
304 | either ``int`` or ``float``. | |
305 | 2. If the above fails because the input is not a string, assume the input | |
306 | is some other sequence (e.g. ``list`` or ``tuple``), and recursively | |
307 | apply the key to each element of the sequence. | |
308 | 3. If the above fails because the input is not iterable, assume the input | |
309 | is an ``int`` or ``float``, and just return the input in a ``tuple``. | |
310 | ||
311 | Because a ``tuple`` is always returned, a ``TypeError`` should not be common | |
312 | unless one tries to do something odd like sort an ``int`` against a ``list``. | |
313 | ||
314 | ``natsort`` gave me results I didn't expect, and it's a terrible library! | |
315 | Did you try to debug using the above advice? If so, and you still cannot figure out | |
316 | the error, then please `file an issue <https://github.com/SethMMorton/natsort/issues/new>`_. | |
134 | 317 | |
135 | 318 | Shell script |
136 | 319 | ------------ |
141 | 324 | Requirements |
142 | 325 | ------------ |
143 | 326 | |
144 | ``natsort`` requires Python version 2.7 or greater or Python 3.2 or greater. | |
145 | ||
146 | .. _optional: | |
327 | ``natsort`` requires Python version 2.6 or greater or Python 3.3 or greater. | |
328 | It may run on (but is not tested against) Python 3.2. | |
147 | 329 | |
148 | 330 | Optional Dependencies |
149 | 331 | --------------------- |
150 | 332 | |
151 | 333 | fastnumbers |
152 | ''''''''''' | |
153 | ||
154 | The most efficient sorting can occur if you install the | |
155 | `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps | |
156 | with the string to number conversions.) ``natsort`` will still run (efficiently) | |
157 | without the package, but if you need to squeeze out that extra juice it is | |
158 | recommended you include this as a dependency. ``natsort`` will not require (or | |
159 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed | |
334 | +++++++++++ | |
335 | ||
336 | The most efficient sorting can occur if you install the | |
337 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ package | |
338 | (version >=0.7.1); it helps with the string to number conversions. | |
339 | ``natsort`` will still run (efficiently) without the package, but if you need | |
340 | to squeeze out that extra juice it is recommended you include this as a dependency. | |
341 | ``natsort`` will not require (or check) that | |
342 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ is installed | |
160 | 343 | at installation. |
161 | 344 | |
162 | 345 | PyICU |
163 | ''''' | |
164 | ||
165 | On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library | |
166 | can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is | |
167 | used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, | |
168 | one can | |
169 | ||
170 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
171 | locale. These locales do not suffer from as many problems as "UTF-8" | |
172 | and thus should give expected results. | |
173 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
174 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
175 | will use it under the hood; this will give more | |
176 | reliable cross-platform results in the long run. ``natsort`` will not | |
177 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
178 | is installed at installation. Please visit | |
179 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
180 | how to install on Mac OS X. **Please note** that using | |
181 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
182 | guarantee correct results for all input on BSD-based systems, since | |
183 | every other suggestion is a workaround. | |
184 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
185 | to compensate for a broken ``locale`` library in terms of case-handling; | |
186 | if you do not need to be able to properly handle non-ASCII characters | |
187 | then this may be the best option for you. | |
188 | ||
189 | Note that the above solutions *should not* be required for Windows or | |
190 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
191 | just fine. | |
192 | ||
193 | .. _deprecate: | |
194 | ||
195 | Moving from older Natsort versions | |
196 | ---------------------------------- | |
197 | ||
198 | - The default sorting algorithm for ``natsort`` has changed in version 4.0.0 | |
199 | from signed floats (with exponents) to unsigned integers. The motivation | |
200 | for this change is that it will cause ``natsort`` to return results that | |
201 | pass the "least astonishment" test for the most common use case, which is | |
202 | sorting version numbers. If you relied on the default behavior | |
203 | to be signed floats, add ``alg=ns.F | ns.S`` to your | |
204 | ``natsort`` calls or switch to the new ``realsorted`` function which | |
205 | behaves identically to the older ``natsorted`` with default values. | |
206 | For 99% of users this change will not effect their code... it is only | |
207 | expected that this will effect users using ``natsort`` for science and | |
208 | engineering. | |
209 | This will also affect the default behavior of the ``natsort`` shell script. | |
210 | - In ``natsort`` version 4.0.0, the ``number_type``, ``signed``, ``exp``, | |
211 | ``as_path``, and ``py3_safe`` options have be removed from the (documented) | |
212 | API in favor of the ``alg`` option and ``ns`` enum. | |
213 | - In ``natsort`` version 4.0.0, the ``natsort_key`` function has been removed | |
214 | from the public API. | |
346 | +++++ | |
347 | ||
348 | It is recommended that you install `PyICU <https://pypi.org/project/PyICU>`_ | |
349 | if you wish to sort in a locale-dependent manner, see | |
350 | http://natsort.readthedocs.io/en/master/locale_issues.html for an explanation why. | |
351 | ||
352 | Installation | |
353 | ------------ | |
354 | ||
355 | Use ``pip``! | |
356 | ||
357 | .. code-block:: sh | |
358 | ||
359 | $ pip install natsort | |
360 | ||
361 | If you want to install the `Optional Dependencies`_, you can use the | |
362 | `"extras" notation <https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras>`_ | |
363 | at installation time to install those dependencies as well - use ``fast`` for | |
364 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ and ``icu`` for | |
365 | `PyICU <https://pypi.org/project/PyICU>`_. | |
366 | ||
367 | .. code-block:: sh | |
368 | ||
369 | # Install both optional dependencies. | |
370 | $ pip install natsort[fast,icu] | |
371 | # Install just fastnumbers | |
372 | $ pip install natsort[fast] | |
373 | ||
374 | How to Run Tests | |
375 | ---------------- | |
376 | ||
377 | Please note that ``natsort`` is NOT set-up to support ``python setup.py test``. | |
378 | ||
379 | The recommended way to run tests is with `tox <https://tox.readthedocs.io/en/latest/>`_. | |
380 | After installing ``tox``, running tests is as simple as executing the following in the | |
381 | ``natsort`` directory: | |
382 | ||
383 | .. code-block:: sh | |
384 | ||
385 | $ tox | |
386 | ||
387 | ``tox`` will create virtual a virtual environment for your tests and install all the | |
388 | needed testing requirements for you. You can specify a particular python version | |
389 | with the ``-e`` flag, e.g. ``tox -e py36``. | |
390 | ||
391 | If you do not wish to use ``tox``, you can install the testing dependencies and run the | |
392 | tests manually using `pytest <https://docs.pytest.org/en/latest/>`_ - ``natsort`` | |
393 | contains a ``Pipfile`` for use with `pipenv <https://github.com/pypa/pipenv>`_ that | |
394 | makes it easy for you to install the testing dependencies: | |
395 | ||
396 | .. code-block:: sh | |
397 | ||
398 | $ pipenv install --skip-lock --dev | |
399 | $ pipenv run python -m pytest | |
400 | ||
401 | Note that above I invoked ``python -m pytest`` instead of just ``pytest`` - this is because | |
402 | `the former puts the CWD on sys.path <https://docs.pytest.org/en/latest/usage.html#calling-pytest-through-python-m-pytest>`_. | |
215 | 403 | |
216 | 404 | Author |
217 | 405 | ------ |
221 | 409 | History |
222 | 410 | ------- |
223 | 411 | |
224 | These are the last three entries of the changelog. See the package documentation | |
225 | for the complete `changelog <http://pythonhosted.org//natsort/changelog.html>`_. | |
226 | ||
227 | 06-25-2015 v. 4.0.3 | |
228 | ''''''''''''''''''' | |
229 | ||
230 | - Fixed bad install on last release (sorry guys!). | |
231 | ||
232 | 06-24-2015 v. 4.0.2 | |
233 | ''''''''''''''''''' | |
234 | ||
235 | - Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now | |
236 | performed for these versions. | |
237 | - Consolidated under-the-hood compatibility functionality. | |
238 | ||
239 | 06-04-2015 v. 4.0.1 | |
240 | ''''''''''''''''''' | |
241 | ||
242 | - Added support for sorting NaN by internally converting to -Infinity | |
243 | or +Infinity | |
412 | Please visit the `changelog <http://natsort.readthedocs.io/en/master/changelog.html>`_. |
0 | #! /bin/bash | |
1 | ||
2 | rm -rf build/ dist/ *.egg-info .pytest_cache/ .hypothesis/ .tox/ | |
3 | find . -type d -name __pycache__ -delete | |
4 | find . -type f -name "*.pyc" -delete |
0 | natsort (5.3.3-1) unstable; urgency=medium | |
1 | ||
2 | * [d/rules] | |
3 | * Remove obsolete get-orig-source target | |
4 | * Use pkg-info.mk instead of parsing changelog by hand | |
5 | * [d/control] | |
6 | * Remove obsolete X-Python-* fields | |
7 | * Update Vcs-* fields pointing to salsa | |
8 | * Bump Standards-Version to 4.2.0 (no changes needed) | |
9 | * Fix capitalization of Python on package description | |
10 | * Add (autopkg) test | |
11 | * Fix debian/watch, upstream doesn't use `v` for tags anymore | |
12 | * Update dh to 11 (no changes needed) | |
13 | * New upstream version 5.3.3 | |
14 | ||
15 | -- Agustin Henze <tin@debian.org> Mon, 20 Aug 2018 10:14:23 +0200 | |
16 | ||
0 | 17 | natsort (4.0.3-2) unstable; urgency=medium |
1 | 18 | |
2 | 19 | * Fix code example in package description (Closes: #778767) |
0 | skip-fail-test |
0 | --- a/test_natsort/test_input_string_transform_factory.py | |
1 | +++ b/test_natsort/test_input_string_transform_factory.py | |
2 | @@ -121,6 +121,7 @@ | |
3 | locale.setlocale(locale.LC_ALL, str('')) | |
4 | ||
5 | ||
6 | +@pytest.mark.skip("It's always failing, reported to upstream https://github.com/SethMMorton/natsort/issues/65") | |
7 | def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT_example(): | |
8 | x = '12,543,642,642.534,534,980' | |
9 | assert _input_string_transform_factory(ns.LOCALE | ns.FLOAT)(x) == '12543642642.534,534980' |
13 | 13 | Section: Programming/Python |
14 | 14 | |
15 | 15 | Format: HTML |
16 | Index: /usr/share/doc/python-natsort-doc/html/index.html | |
17 | Files: /usr/share/doc/python-natsort-doc/html/* | |
16 | Index: /usr/share/doc/python-natsort/html/index.html | |
17 | Files: /usr/share/doc/python-natsort/html/* |
0 | 0 | Tests: unittests |
1 | 1 | Depends: @, |
2 | python3-setuptools, | |
3 | python-setuptools, | |
2 | tox, | |
4 | 3 | locales-all, |
5 | ca-certificates, | |
4 | build-essential, | |
5 | python2-dev, | |
6 | python3-dev, |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | .. _function_help: | |
4 | ||
5 | Help With Creating Function Keys | |
6 | ================================ | |
7 | ||
8 | If you need to create a complicated *key* argument to (for example) | |
9 | :func:`natsorted` that is actually multiple functions called one after the other, | |
10 | the following function can help you easily perform this action. It is | |
11 | used internally to :mod:`natsort`, and has been exposed publically for | |
12 | the convenience of the user. | |
13 | ||
14 | .. autofunction:: chain_functions | |
15 |
2 | 2 | Changelog |
3 | 3 | --------- |
4 | 4 | |
5 | 07-07-2018 v. 5.3.3 | |
6 | +++++++++++++++++++ | |
7 | ||
8 | - Update docs with a FAQ and quick how-it-works. | |
9 | - Fix a StopIteration error in the testing code. | |
10 | - Enable Python 3.7 support in Travis-CI. | |
11 | ||
12 | 05-17-2018 v. 5.3.2 | |
13 | +++++++++++++++++++ | |
14 | ||
15 | - Fix bug that prevented install on old versions of setuptools. | |
16 | - Revert layout from src/natsort/ back to natsort/ to make user | |
17 | testing simpler. | |
18 | ||
19 | 05-14-2018 v. 5.3.1 | |
20 | +++++++++++++++++++ | |
21 | ||
22 | - No bugfixes or features, just infrastructure and installation updates. | |
23 | - Move to defining dependencies with Pipfile. | |
24 | - Development layout is now src/natsort/ instead of natsort/. | |
25 | - Add bumpversion infrastructure. | |
26 | - Extras can be installed by "[]" notation. | |
27 | ||
28 | 04-20-2018 v. 5.3.0 | |
29 | +++++++++++++++++++ | |
30 | ||
31 | - Fix bug in assessing ``fastnumbers`` version at import-time. | |
32 | - Add ability to consider unicode-decimal numbers as numbers. | |
33 | ||
34 | 02-14-2018 v. 5.2.0 | |
35 | +++++++++++++++++++ | |
36 | ||
37 | - Add ``ns.NUMAFTER`` to cause numbers to be placed after non-numbers. | |
38 | - Add ``natcmp`` function (Python 2 only). | |
39 | ||
40 | 11-11-2017 v. 5.1.1 | |
41 | +++++++++++++++++++ | |
42 | ||
43 | - Added additional unicode number support for Python 3.7. | |
44 | - Added information on how to install and test. | |
45 | ||
46 | 08-19-2017 v. 5.1.0 | |
47 | +++++++++++++++++++ | |
48 | ||
49 | - Fixed ``StopIteration`` warning on Python 3.6+. | |
50 | - All Unicode input is now normalized. | |
51 | ||
52 | 04-30-2017 v. 5.0.3 | |
53 | +++++++++++++++++++ | |
54 | ||
55 | - Improved development infrastructure. | |
56 | - Migrated documentation to ReadTheDocs. | |
57 | ||
58 | 01-02-2017 v. 5.0.2 | |
59 | +++++++++++++++++++ | |
60 | ||
61 | - Added additional unicode number support for Python 3.6. | |
62 | - Renamed several internal functions and variables to improve clarity. | |
63 | - Improved documentation examples. | |
64 | - Added a "how does it work?" section to the documentation. | |
65 | ||
66 | 06-04-2016 v. 5.0.1 | |
67 | +++++++++++++++++++ | |
68 | ||
69 | - The ``ns`` enum attributes can now be imported from the top-level | |
70 | namespace. | |
71 | - Fixed a bug with the ``from natsort import *`` mechanism. | |
72 | - Fixed bug with using ``natsort`` with ``python -OO``. | |
73 | ||
74 | 05-08-2016 v. 5.0.0 | |
75 | +++++++++++++++++++ | |
76 | ||
77 | - ``ns.LOCALE``/``humansorted`` now accounts for thousands separators. | |
78 | - Refactored entire codebase to be more functional (as in use functions as | |
79 | units). Previously, the code was rather monolithic and difficult to follow. The | |
80 | goal is that with the code existing in smaller units, contributing will | |
81 | be easier. | |
82 | - Deprecated ``ns.TYPESAFE`` option as it is now always on (due to a new | |
83 | iterator-based algorithm, the typesafe function is now cheap). | |
84 | - Increased speed of execution (came for free with the new functional approach | |
85 | because the new factory function paradigm eliminates most ``if`` branches | |
86 | during execution). | |
87 | ||
88 | - For the most cases, the code is 30-40% faster than version 4.0.4. | |
89 | - If using ``ns.LOCALE`` or ``humansorted``, the code is 1100% faster than | |
90 | version 4.0.4. | |
91 | ||
92 | - Improved clarity of documentaion with regards to locale-aware sorting. | |
93 | - Added a new ``chain_functions`` function for convenience in creating | |
94 | a complex user-given ``key`` from several existing functions. | |
95 | ||
96 | 11-01-2015 v. 4.0.4 | |
97 | +++++++++++++++++++ | |
98 | ||
99 | - Improved coverage of unit tests. | |
100 | - Unit tests use new and improved hypothesis library. | |
101 | - Fixed compatibility issues with Python 3.5 | |
102 | ||
5 | 103 | 06-25-2015 v. 4.0.3 |
6 | ''''''''''''''''''' | |
104 | +++++++++++++++++++ | |
7 | 105 | |
8 | 106 | - Fixed bad install on last release (sorry guys!). |
9 | 107 | |
10 | 108 | 06-24-2015 v. 4.0.2 |
11 | ''''''''''''''''''' | |
109 | +++++++++++++++++++ | |
12 | 110 | |
13 | 111 | - Added back Python 2.6 and Python 3.2 compatibility. Unit testing is now |
14 | 112 | performed for these versions. |
15 | 113 | - Consolidated under-the-hood compatibility functionality. |
16 | 114 | |
17 | 115 | 06-04-2015 v. 4.0.1 |
18 | ''''''''''''''''''' | |
116 | +++++++++++++++++++ | |
19 | 117 | |
20 | 118 | - Added support for sorting NaN by internally converting to -Infinity |
21 | 119 | or +Infinity |
22 | 120 | |
23 | 121 | 05-17-2015 v. 4.0.0 |
24 | ''''''''''''''''''' | |
122 | +++++++++++++++++++ | |
25 | 123 | |
26 | 124 | - Made default behavior of 'natsort' search for unsigned ints, |
27 | 125 | rather than signed floats. This is a backwards-incompatible |
32 | 130 | - Greatly improved all unit tests by adding the hypothesis library. |
33 | 131 | |
34 | 132 | 04-06-2015 v. 3.5.6 |
35 | ''''''''''''''''''' | |
133 | +++++++++++++++++++ | |
36 | 134 | |
37 | 135 | - Added 'UNGROUPLETTERS' algorithm to get the case-grouping behavior of |
38 | 136 | an ordinal sort when using 'LOCALE'. |
40 | 138 | dealing with bytes types. |
41 | 139 | |
42 | 140 | 04-04-2015 v. 3.5.5 |
43 | ''''''''''''''''''' | |
141 | +++++++++++++++++++ | |
44 | 142 | |
45 | 143 | - Added 'realsorted' and 'index_realsorted' functions for |
46 | 144 | forward-compatibility with >= 4.0.0. |
47 | 145 | - Made explanation of when to use "TYPESAFE" more clear in the docs. |
48 | 146 | |
49 | 147 | 04-02-2015 v. 3.5.4 |
50 | ''''''''''''''''''' | |
148 | +++++++++++++++++++ | |
51 | 149 | |
52 | 150 | - Fixed bug where a 'TypeError' was raised if a string containing a leading |
53 | 151 | number was sorted with alpha-only strings when 'LOCALE' is used. |
54 | 152 | |
55 | 153 | 03-26-2015 v. 3.5.3 |
56 | ''''''''''''''''''' | |
154 | +++++++++++++++++++ | |
57 | 155 | |
58 | 156 | - Fixed bug where '--reverse-filter' option in shell script was not |
59 | 157 | getting checked for correctness. |
62 | 160 | - Internal improvements, including making test suite more granular. |
63 | 161 | |
64 | 162 | 01-13-2015 v. 3.5.2 |
65 | ''''''''''''''''''' | |
163 | +++++++++++++++++++ | |
66 | 164 | |
67 | 165 | - Enhancement that will convert a 'pathlib.Path' object to a 'str' if |
68 | 166 | 'ns.PATH' is enabled. |
69 | 167 | |
70 | 168 | 09-25-2014 v. 3.5.1 |
71 | ''''''''''''''''''' | |
169 | +++++++++++++++++++ | |
72 | 170 | |
73 | 171 | - Fixed bug that caused list/tuples to fail when using 'ns.LOWECASEFIRST' |
74 | 172 | or 'ns.IGNORECASE'. |
78 | 176 | |
79 | 177 | |
80 | 178 | 09-02-2014 v. 3.5.0 |
81 | ''''''''''''''''''' | |
179 | +++++++++++++++++++ | |
82 | 180 | |
83 | 181 | - Added the 'alg' argument to the 'natsort' functions. This argument |
84 | 182 | accepts an enum that is used to indicate the options the user wishes |
96 | 194 | - Updated shell script with locale functionality. |
97 | 195 | |
98 | 196 | 08-12-2014 v. 3.4.1 |
99 | ''''''''''''''''''' | |
197 | +++++++++++++++++++ | |
100 | 198 | |
101 | 199 | - 'natsort' will now use the 'fastnumbers' module if it is installed. This |
102 | 200 | gives up to an extra 30% boost in speed over the previous performance |
105 | 203 | new example in the examples section. |
106 | 204 | |
107 | 205 | 07-19-2014 v. 3.4.0 |
108 | ''''''''''''''''''' | |
206 | +++++++++++++++++++ | |
109 | 207 | |
110 | 208 | - Fixed a bug that caused user's options to the 'natsort_key' to not be |
111 | 209 | passed on to recursive calls of 'natsort_key'. |
134 | 232 | - Entire codebase is now PyFlakes and PEP8 compliant. |
135 | 233 | |
136 | 234 | 06-28-2014 v. 3.3.0 |
137 | ''''''''''''''''''' | |
235 | +++++++++++++++++++ | |
138 | 236 | |
139 | 237 | - Added a 'versorted' method for more convenient sorting of versions. |
140 | 238 | - Updated command-line tool --number_type option with 'version' and 'ver' |
149 | 247 | - Connected natsort development to Travis-CI to help ensure quality releases. |
150 | 248 | |
151 | 249 | 06-20-2014 v. 3.2.1 |
152 | ''''''''''''''''''' | |
250 | +++++++++++++++++++ | |
153 | 251 | |
154 | 252 | - Re-"Fixed" unorderable types issue on Python 3.x - this workaround |
155 | 253 | is for when the problem occurs in the middle of the string. |
156 | 254 | |
157 | 255 | 05-07-2014 v. 3.2.0 |
158 | ''''''''''''''''''' | |
256 | +++++++++++++++++++ | |
159 | 257 | |
160 | 258 | - "Fixed" unorderable types issue on Python 3.x with a workaround that |
161 | 259 | attempts to replicate the Python 2.x behavior by putting all the numbers |
164 | 262 | to MANIFEST.in. |
165 | 263 | |
166 | 264 | 05-05-2014 v. 3.1.2 |
167 | ''''''''''''''''''' | |
265 | +++++++++++++++++++ | |
168 | 266 | |
169 | 267 | - Added setup.cfg to support universal wheels. |
170 | 268 | - Added Python 3.0 and Python 3.1 as requiring the argparse module. |
171 | 269 | |
172 | 270 | 03-01-2014 v. 3.1.1 |
173 | ''''''''''''''''''' | |
271 | +++++++++++++++++++ | |
174 | 272 | |
175 | 273 | - Added ability to sort lists of lists. |
176 | 274 | - Cleaned up import statements. |
177 | 275 | |
178 | 276 | 01-20-2014 v. 3.1.0 |
179 | ''''''''''''''''''' | |
277 | +++++++++++++++++++ | |
180 | 278 | |
181 | 279 | - Added the ``signed`` and ``exp`` options to allow finer tuning of the sorting |
182 | 280 | - Entire codebase now works for both Python 2 and Python 3 without needing to run |
196 | 294 | to filter by. |
197 | 295 | |
198 | 296 | 10-01-2013 v. 3.0.2 |
199 | ''''''''''''''''''' | |
297 | +++++++++++++++++++ | |
200 | 298 | |
201 | 299 | - Made float, int, and digit searching algorithms all share the same base function. |
202 | 300 | - Fixed some outdated comments. |
203 | 301 | - Made the ``__version__`` variable available when importing the module. |
204 | 302 | |
205 | 303 | 8-15-2013 v. 3.0.1 |
206 | '''''''''''''''''' | |
304 | ++++++++++++++++++ | |
207 | 305 | |
208 | 306 | - Added support for unicode strings. |
209 | 307 | - Removed extraneous ``string2int`` function. |
210 | 308 | - Fixed empty string removal function. |
211 | 309 | |
212 | 310 | 7-13-2013 v. 3.0.0 |
213 | '''''''''''''''''' | |
311 | ++++++++++++++++++ | |
214 | 312 | |
215 | 313 | - Added a ``number_type`` argument to the sorting functions to specify how |
216 | 314 | liberal to be when deciding what a number is. |
217 | 315 | - Reworked the documentation. |
218 | 316 | |
219 | 317 | 6-25-2013 v. 2.2.0 |
220 | '''''''''''''''''' | |
318 | ++++++++++++++++++ | |
221 | 319 | |
222 | 320 | - Added ``key`` attribute to ``natsorted`` and ``index_natsorted`` so that |
223 | 321 | it mimics the functionality of the built-in ``sorted`` |
225 | 323 | how to get similar functionality using ``natsort_key``. |
226 | 324 | |
227 | 325 | 12-5-2012 v. 2.1.0 |
228 | '''''''''''''''''' | |
326 | ++++++++++++++++++ | |
229 | 327 | |
230 | 328 | - Reorganized package. |
231 | 329 | - Now using a platform independent shell script generator (entry_points |
234 | 332 | as well. |
235 | 333 | |
236 | 334 | 11-30-2012 v. 2.0.2 |
237 | ''''''''''''''''''' | |
335 | +++++++++++++++++++ | |
238 | 336 | |
239 | 337 | - Added the use_2to3 option to setup.py. |
240 | 338 | - Added distribute_setup.py to the distribution. |
241 | 339 | - Added dependency to the argparse module (for python2.6). |
242 | 340 | |
243 | 341 | 11-21-2012 v. 2.0.1 |
244 | ''''''''''''''''''' | |
342 | +++++++++++++++++++ | |
245 | 343 | |
246 | 344 | - Reorganized directory structure. |
247 | 345 | - Added tests into the natsort.py file iteself. |
248 | 346 | |
249 | 347 | 11-16-2012, v. 2.0.0 |
250 | '''''''''''''''''''' | |
348 | ++++++++++++++++++++ | |
251 | 349 | |
252 | 350 | - Updated sorting algorithm to support floats (including exponentials) and |
253 | 351 | basic version number support. |
12 | 12 | # serve to show the default. |
13 | 13 | |
14 | 14 | import os |
15 | import re | |
16 | ||
17 | def current_version(): | |
18 | # Read the _version.py file for the module version number | |
19 | VERSIONFILE = os.path.join('..', '..', 'natsort', '_version.py') | |
20 | versionsearch = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]") | |
21 | with open(VERSIONFILE, "rt") as fl: | |
22 | for line in fl: | |
23 | m = versionsearch.search(line) | |
24 | if m: | |
25 | return m.group(1) | |
26 | else: | |
27 | s = "Unable to locate version string in {0}" | |
28 | raise RuntimeError(s.format(VERSIONFILE)) | |
29 | 15 | |
30 | 16 | # If extensions (or modules to document with autodoc) are in another directory, |
31 | 17 | # add these directories to sys.path here. If the directory is relative to the |
44 | 30 | 'sphinx.ext.autodoc', |
45 | 31 | 'sphinx.ext.autosummary', |
46 | 32 | 'sphinx.ext.intersphinx', |
47 | 'numpydoc', | |
33 | 'sphinx.ext.mathjax', | |
34 | 'sphinx.ext.napoleon', | |
48 | 35 | ] |
49 | 36 | |
50 | 37 | # Add any paths that contain templates here, relative to this directory. |
68 | 55 | # built documents. |
69 | 56 | # |
70 | 57 | # The full version, including alpha/beta/rc tags. |
71 | release = current_version() | |
58 | release = '5.3.3' | |
72 | 59 | # The short X.Y version. |
73 | 60 | version = '.'.join(release.split('.')[0:2]) |
74 | 61 | |
84 | 71 | |
85 | 72 | # List of patterns, relative to source directory, that match files and |
86 | 73 | # directories to ignore when looking for source files. |
87 | exclude_patterns = ['solar/*'] | |
74 | # exclude_patterns = ['solar/*'] | |
88 | 75 | |
89 | 76 | # The reST default role (used for this markup: `text`) to use for all |
90 | 77 | # documents. |
116 | 103 | |
117 | 104 | # The theme to use for HTML and HTML Help pages. See the documentation for |
118 | 105 | # a list of builtin themes. |
119 | html_theme = 'solar' | |
106 | on_rtd = os.environ.get('READTHEDOCS') == 'True' | |
107 | if on_rtd: | |
108 | html_theme = 'default' | |
109 | else: | |
110 | import sphinx_rtd_theme | |
111 | html_theme = 'sphinx_rtd_theme' | |
112 | # html_theme = 'solar' | |
120 | 113 | |
121 | 114 | # Theme options are theme-specific and customize the look and feel of a theme |
122 | 115 | # further. For a list of options available for each theme, see the |
277 | 270 | |
278 | 271 | |
279 | 272 | # Example configuration for intersphinx: refer to the Python standard library. |
280 | intersphinx_mapping = {'http://docs.python.org/': None} | |
273 | intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} |
8 | 8 | If you want more detailed examples than given on this page, please see |
9 | 9 | https://github.com/SethMMorton/natsort/tree/master/test_natsort. |
10 | 10 | |
11 | .. contents:: | |
12 | :local: | |
13 | ||
11 | 14 | Basic Usage |
12 | 15 | ----------- |
13 | 16 | |
14 | 17 | In the most basic use case, simply import :func:`~natsorted` and use |
15 | it as you would :func:`sorted`:: | |
16 | ||
17 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
18 | it as you would :func:`sorted`: | |
19 | ||
20 | .. code-block:: python | |
21 | ||
22 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
18 | 23 | >>> sorted(a) |
19 | ['a1', 'a10', 'a2', 'a4', 'a9'] | |
24 | ['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in'] | |
20 | 25 | >>> from natsort import natsorted, ns |
21 | 26 | >>> natsorted(a) |
22 | ['a1', 'a2', 'a4', 'a9', 'a10'] | |
27 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
23 | 28 | |
24 | 29 | Sort Version Numbers |
25 | 30 | -------------------- |
34 | 39 | ++++++++++++++++++++++++++++++++++++++++++++++++ |
35 | 40 | |
36 | 41 | By default, if you wish to sort versions with a non-strict versioning |
37 | scheme, you may not get the results you expect:: | |
42 | scheme, you may not get the results you expect: | |
43 | ||
44 | .. code-block:: python | |
38 | 45 | |
39 | 46 | >>> a = ['1.2', '1.2rc1', '1.2beta2', '1.2beta1', '1.2alpha', '1.2.1', '1.1', '1.3'] |
40 | 47 | >>> natsorted(a) |
41 | 48 | ['1.1', '1.2', '1.2.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.3'] |
42 | 49 | |
43 | 50 | To make the '1.2' pre-releases come before '1.2.1', you need to use the following |
44 | recipe:: | |
51 | recipe: | |
52 | ||
53 | .. code-block:: python | |
45 | 54 | |
46 | 55 | >>> natsorted(a, key=lambda x: x.replace('.', '~')) |
47 | 56 | ['1.1', '1.2', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2.1', '1.3'] |
48 | 57 | |
49 | 58 | If you also want '1.2' after all the alpha, beta, and rc candidates, you can |
50 | modify the above recipe:: | |
59 | modify the above recipe: | |
60 | ||
61 | .. code-block:: python | |
51 | 62 | |
52 | 63 | >>> natsorted(a, key=lambda x: x.replace('.', '~')+'z') |
53 | 64 | ['1.1', '1.2alpha', '1.2beta1', '1.2beta2', '1.2rc1', '1.2', '1.2.1', '1.3'] |
55 | 66 | Please see `this issue <https://github.com/SethMMorton/natsort/issues/13>`_ to |
56 | 67 | see why this works. |
57 | 68 | |
69 | .. _path_sort: | |
70 | ||
58 | 71 | Sort OS-Generated Paths |
59 | 72 | ----------------------- |
60 | 73 | |
61 | 74 | In some cases when sorting file paths with OS-Generated names, the default |
62 | 75 | :mod:`~natsorted` algorithm may not be sufficient. In cases like these, |
63 | you may need to use the ``ns.PATH`` option:: | |
76 | you may need to use the ``ns.PATH`` option: | |
77 | ||
78 | .. code-block:: python | |
64 | 79 | |
65 | 80 | >>> a = ['./folder/file (1).txt', |
66 | 81 | ... './folder/file.txt', |
74 | 89 | Locale-Aware Sorting (Human Sorting) |
75 | 90 | ------------------------------------ |
76 | 91 | |
92 | .. note:: | |
93 | Please read :ref:`locale_issues` before using ``ns.LOCALE``, :func:`humansorted`, | |
94 | or :func:`index_humansorted`. | |
95 | ||
77 | 96 | You can instruct :mod:`natsort` to use locale-aware sorting with the |
78 | 97 | ``ns.LOCALE`` option. In addition to making this understand non-ASCII |
79 | 98 | characters, it will also properly interpret non-'.' decimal separators |
80 | 99 | and also properly order case. It may be more convenient to just use |
81 | the :func:`humansorted` function:: | |
100 | the :func:`humansorted` function: | |
101 | ||
102 | .. code-block:: python | |
82 | 103 | |
83 | 104 | >>> from natsort import humansorted |
84 | 105 | >>> import locale |
92 | 113 | |
93 | 114 | You may find that if you do not explicitly set the locale your results may not |
94 | 115 | be as you expect... I have found that it depends on the system you are on. |
95 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see below) then | |
116 | If you use `PyICU <https://pypi.org/project/PyICU>`_ (see below) then | |
96 | 117 | you should not need to do this. |
97 | 118 | |
98 | .. _bug_note: | |
99 | ||
100 | Known Bugs When Using Locale-Aware Sorting On BSD-Based OSs (Including Mac OS X) | |
101 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
102 | ||
103 | If you find that ``ns.LOCALE`` (or :func:`~humansorted`) does not give | |
104 | the results you expect, before filing a bug report please try to first install | |
105 | `PyICU <https://pypi.python.org/pypi/PyICU>`_; this *especially* applies | |
106 | to users on BSD-based systems (like Mac OS X). There are some known bugs | |
107 | with the ``locale`` module from the standard library that are solved when | |
108 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_; you can read about | |
109 | them here: http://bugs.python.org/issue23195. | |
110 | ||
111 | If you have problems with ``ns.LOCALE`` (or :func:`~humansorted`), | |
112 | especially on BSD-based systems, you can try the following: | |
113 | ||
114 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
115 | locale. These locales do not suffer from as many problems as "UTF-8" | |
116 | and thus should give expected results. | |
117 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
118 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
119 | will use it under the hood; this will give more | |
120 | reliable cross-platform results in the long run. ``natsort`` will not | |
121 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
122 | is installed at installation. Please visit | |
123 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
124 | how to install on Mac OS X. **Please note** that using | |
125 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
126 | guarantee correct results for all input on BSD-based systems, since | |
127 | every other suggestion is a workaround. | |
128 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
129 | to compensate for a broken ``locale`` library in terms of case-handling; | |
130 | if you do not need to be able to properly handle non-ASCII characters | |
131 | then this may be the best option for you. | |
132 | ||
133 | Note that the above solutions *should not* be required for Windows or | |
134 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
135 | just fine. | |
119 | .. _case_sort: | |
136 | 120 | |
137 | 121 | Controlling Case When Sorting |
138 | 122 | ----------------------------- |
139 | 123 | |
140 | 124 | For non-numbers, by default :mod:`natsort` used ordinal sorting (i.e. |
141 | it sorts by the character's value in the ASCII table). For example:: | |
125 | it sorts by the character's value in the ASCII table). For example: | |
126 | ||
127 | .. code-block:: python | |
142 | 128 | |
143 | 129 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] |
144 | 130 | >>> natsorted(a) |
145 | 131 | ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] |
146 | 132 | |
147 | 133 | There are times when you wish to ignore the case when sorting, |
148 | you can easily do this with the ``ns.IGNORECASE`` option:: | |
134 | you can easily do this with the ``ns.IGNORECASE`` option: | |
135 | ||
136 | .. code-block:: python | |
149 | 137 | |
150 | 138 | >>> natsorted(a, alg=ns.IGNORECASE) |
151 | 139 | ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] |
156 | 144 | |
157 | 145 | Upper-case letters appear first in the ASCII table, but many natural |
158 | 146 | sorting methods place lower-case first. To do this, use |
159 | ``ns.LOWERCASEFIRST``:: | |
147 | ``ns.LOWERCASEFIRST``: | |
148 | ||
149 | .. code-block:: python | |
160 | 150 | |
161 | 151 | >>> natsorted(a, alg=ns.LOWERCASEFIRST) |
162 | 152 | ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] |
164 | 154 | It may be undesirable to have the upper-case letters grouped together |
165 | 155 | and the lower-case letters grouped together; most would expect all |
166 | 156 | "a"s to bet together regardless of case, and all "b"s, and so on. To |
167 | achieve this, use ``ns.GROUPLETTERS``:: | |
157 | achieve this, use ``ns.GROUPLETTERS``: | |
158 | ||
159 | .. code-block:: python | |
168 | 160 | |
169 | 161 | >>> natsorted(a, alg=ns.GROUPLETTERS) |
170 | 162 | ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] |
171 | 163 | |
172 | 164 | You might combine this with ``ns.LOWERCASEFIRST`` to get what most |
173 | would expect to be "natural" sorting:: | |
165 | would expect to be "natural" sorting: | |
166 | ||
167 | .. code-block:: python | |
174 | 168 | |
175 | 169 | >>> natsorted(a, alg=ns.G | ns.LF) |
176 | 170 | ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] |
181 | 175 | You can make :func:`~natsorted` search for any float that would be |
182 | 176 | a valid Python float literal, such as 5, 0.4, -4.78, +4.2E-34, etc. |
183 | 177 | using the ``ns.FLOAT`` key. You can disable the exponential component |
184 | of the number with ``ns.NOEXP``. :: | |
178 | of the number with ``ns.NOEXP``. | |
179 | ||
180 | .. code-block:: python | |
185 | 181 | |
186 | 182 | >>> a = ['a50', 'a51.', 'a+50.4', 'a5.034e1', 'a+50.300'] |
187 | 183 | >>> natsorted(a, alg=ns.FLOAT) |
196 | 192 | This can be easily accessed with the :func:`~realsorted` convenience |
197 | 193 | function. Please note that the behavior of the :func:`~realsorted` function |
198 | 194 | was the default behavior of :func:`~natsorted` for :mod:`natsort` |
199 | version < 4.0.0:: | |
195 | version < 4.0.0: | |
196 | ||
197 | .. code-block:: python | |
200 | 198 | |
201 | 199 | >>> natsorted(a, alg=ns.REAL) |
202 | 200 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] |
204 | 202 | >>> realsorted(a) |
205 | 203 | ['a50', 'a+50.300', 'a5.034e1', 'a+50.4', 'a51.'] |
206 | 204 | |
205 | .. _custom_sort: | |
206 | ||
207 | 207 | Using a Custom Sorting Key |
208 | 208 | -------------------------- |
209 | 209 | |
210 | 210 | Like the built-in ``sorted`` function, ``natsorted`` can accept a custom |
211 | sort key so that:: | |
211 | sort key so that: | |
212 | ||
213 | .. code-block:: python | |
212 | 214 | |
213 | 215 | >>> from operator import attrgetter, itemgetter |
214 | 216 | >>> a = [['a', 'num4'], ['b', 'num8'], ['c', 'num2']] |
228 | 230 | |
229 | 231 | If you need to sort a list in-place, you cannot use :func:`~natsorted`; you |
230 | 232 | need to pass a key to the :meth:`list.sort` method. The function |
231 | :func:`~natsort_keygen` is a convenient way to generate these keys for you:: | |
233 | :func:`~natsort_keygen` is a convenient way to generate these keys for you: | |
234 | ||
235 | .. code-block:: python | |
232 | 236 | |
233 | 237 | >>> from natsort import natsort_keygen |
234 | 238 | >>> a = ['a50', 'a51.', 'a50.4', 'a5.034e1', 'a50.300'] |
240 | 244 | :func:`~natsort_keygen` has the same API as :func:`~natsorted` (minus the |
241 | 245 | `reverse` option). |
242 | 246 | |
247 | Natural Sorting with ``cmp`` (Python 2 only) | |
248 | -------------------------------------------- | |
249 | ||
250 | .. note:: | |
251 | This is a Python2-only feature! The :func:`natcmp` function is not | |
252 | exposed on Python3. Because this documentation is built with | |
253 | Python3, you will not find :func:`natcmp` in the API. | |
254 | ||
255 | If you are using a legacy codebase that requires you to use :func:`cmp` instead | |
256 | of a key-function, you can use :func:`~natcmp`. | |
257 | ||
258 | .. code-block:: python | |
259 | ||
260 | >>> import sys | |
261 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
262 | >>> if sys.version_info[0] == 2: | |
263 | ... from natsort import natcmp | |
264 | ... sorted(a, cmp=natcmp) | |
265 | ... else: | |
266 | ... natsorted(a) # so docstrings don't fail | |
267 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
268 | ||
269 | :func:`natcmp` also accepts an ``alg`` argument so you can customize your | |
270 | sorting experience. | |
271 | ||
243 | 272 | Sorting Multiple Lists According to a Single List |
244 | 273 | ------------------------------------------------- |
245 | 274 | |
247 | 276 | lists and reorder the other lists according to how the first was sorted. |
248 | 277 | To achieve this you could use the :func:`~index_natsorted` in combination |
249 | 278 | with the convenience function |
250 | :func:`~order_by_index`:: | |
279 | :func:`~order_by_index`: | |
280 | ||
281 | .. code-block:: python | |
251 | 282 | |
252 | 283 | >>> from natsort import index_natsorted, order_by_index |
253 | 284 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] |
265 | 296 | ---------------------------------- |
266 | 297 | |
267 | 298 | Just like the :func:`sorted` built-in function, you can supply the |
268 | ``reverse`` option to return the results in reverse order:: | |
299 | ``reverse`` option to return the results in reverse order: | |
300 | ||
301 | .. code-block:: python | |
269 | 302 | |
270 | 303 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] |
271 | 304 | >>> natsorted(a, reverse=True) |
286 | 319 | these functions know not to raise an error if the input is not a byte |
287 | 320 | array, so you can use the key on any arbitrary collection of data. |
288 | 321 | |
289 | :: | |
322 | .. code-block:: python | |
290 | 323 | |
291 | 324 | >>> from natsort import as_ascii |
292 | 325 | >>> a = [b'a', 14.0, 'b'] |
299 | 332 | so that :mod:`natsort` cannot parse them for numbers. As a result, if you |
300 | 333 | run :mod:`natsort` on a list of bytes, you will get results that are like |
301 | 334 | Python's default sorting behavior. Of course, you can use the decoding |
302 | functions to solve this:: | |
335 | functions to solve this: | |
336 | ||
337 | .. code-block:: python | |
303 | 338 | |
304 | 339 | >>> from natsort import as_utf8 |
305 | 340 | >>> a = [b'a56', b'a5', b'a6', b'a40'] |
309 | 344 | True |
310 | 345 | |
311 | 346 | If you need a codec different from ASCII or UTF-8, you can use |
312 | :func:`decoder` to generate a custom key:: | |
347 | :func:`decoder` to generate a custom key: | |
348 | ||
349 | .. code-block:: python | |
313 | 350 | |
314 | 351 | >>> from natsort import decoder |
315 | 352 | >>> a = [b'a56', b'a5', b'a6', b'a40'] |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | .. _howitworks: | |
4 | ||
5 | How Does Natsort Work? | |
6 | ====================== | |
7 | ||
8 | .. contents:: | |
9 | :local: | |
10 | ||
11 | :mod:`natsort` works by breaking strings into smaller sub-components (numbers | |
12 | or everything else), and returning these components in a tuple. Sorting | |
13 | tuples in Python is well-defined, and this fact is used to sort the input | |
14 | strings properly. But how does one break a string into sub-components? | |
15 | And what does one do to those components once they are split? Below I | |
16 | will explain the algorithm that was chosen for the :mod:`natsort` module, | |
17 | and some of the thinking that went into those design decisions. I will | |
18 | also mention some of the stumbling blocks I ran into because | |
19 | `getting sorting right is surprisingly hard`_. | |
20 | ||
21 | If you are impatient, you can skip to :ref:`tldr1` for the algorithm | |
22 | in the simplest case, and :ref:`tldr2` | |
23 | to see what extra code is needed to handle special cases. | |
24 | ||
25 | First, How Does Natural Sorting Work At a High Level? | |
26 | ----------------------------------------------------- | |
27 | ||
28 | If I want to compare '2 ft 7 in' to '2 ft 11 in', I might do the following | |
29 | ||
30 | .. code-block:: python | |
31 | ||
32 | >>> '2 ft 7 in' < '2 ft 11 in' | |
33 | False | |
34 | ||
35 | We as humans know that the above should be true, but why does Python think it | |
36 | is false? Here is how it is performing the comparison:: | |
37 | ||
38 | '2' <=> '2' ==> equal, so keep going | |
39 | ' ' <=> ' ' ==> equal, so keep going | |
40 | 'f' <=> 'f' ==> equal, so keep going | |
41 | 't' <=> 't' ==> equal, so keep going | |
42 | ' ' <=> ' ' ==> equal, so keep going | |
43 | '7' <=> '1' ==> different, use result of '7' < '1' | |
44 | ||
45 | '7' evaluates as greater than '1' so the statement is false. When sorting, if | |
46 | a value is less than another it is placed first, so in our above example | |
47 | '2 ft 11 in' would end up before '2 ft 7 in', which is not correct. What to do? | |
48 | ||
49 | The best way to handle this is to break the string into sub-components | |
50 | of numbers and non-numbers, and then convert the numeric parts into | |
51 | :func:`float` or :func:`int` types. This will force Python to | |
52 | actually understand the context of what it is sorting and then "do the | |
53 | right thing." Luckily, it handles sorting lists of strings right out-of-the-box, | |
54 | so the only hard part is actually making this string-to-list transformation | |
55 | and then Python will handle the rest. | |
56 | ||
57 | :: | |
58 | ||
59 | '2 ft 7 in' ==> (2, ' ft ', 7, ' in') | |
60 | '2 ft 11 in' ==> (2, ' ft ', 11, ' in') | |
61 | ||
62 | When Python compares the two, it roughly follows the below logic:: | |
63 | ||
64 | 2 <=> 2 ==> equal, so keep going | |
65 | ' ft ' <=> ' ft ' ==> a string is a special type of sequence - evaluate each character individually | |
66 | || | |
67 | --> | |
68 | ' ' <=> ' ' ==> equal, so keep going | |
69 | 'f' <=> 'f' ==> equal, so keep going | |
70 | 't' <=> 't' ==> equal, so keep going | |
71 | ' ' <=> ' ' ==> equal, so keep going | |
72 | <== Back to parent sequence | |
73 | 7 <=> 11 ==> different, use the result of 7 < 11 | |
74 | ||
75 | Clearly, seven is less than eleven, so our comparison is as we expect, and we | |
76 | would get the sorting order we wanted. | |
77 | ||
78 | At its heart, :mod:`natsort` is simply a tool to break strings into tuples, | |
79 | turning numbers in strings (i.e. ``'79'``) into *ints* and *floats* as it does this. | |
80 | ||
81 | Natsort's Approach | |
82 | ------------------ | |
83 | ||
84 | .. contents:: | |
85 | :local: | |
86 | ||
87 | Decomposing Strings Into Sub-Components | |
88 | +++++++++++++++++++++++++++++++++++++++ | |
89 | ||
90 | The first major hurtle to overcome is to decompose the string into sub-components. | |
91 | Remarkably, this turns out to be the easy part, owing mostly to Python's easy access | |
92 | to regular expressions. Breaking an arbitrary string based on a pattern is pretty | |
93 | straightforward. | |
94 | ||
95 | .. code-block:: python | |
96 | ||
97 | >>> import re | |
98 | >>> re.split(r'(\d+)', '2 ft 11 in') | |
99 | ['', '2', ' ft ', '11', ' in'] | |
100 | ||
101 | Clear (assuming you can read regular expressions) and concise. | |
102 | ||
103 | The reason I began developing :mod:`natsort` in the first place was because I | |
104 | needed to handle the natural sorting of strings containing *real numbers*, not just | |
105 | unsigned integers as the above example contains. By real numbers, I mean those like | |
106 | ``-45.4920E-23``. :mod:`natsort` can handle just about any number definition; | |
107 | to that end, here are all the regular expressions used in :mod:`natsort`: | |
108 | ||
109 | .. code-block:: python | |
110 | ||
111 | >>> unsigned_int = r'([0-9]+)' | |
112 | >>> signed_int = r'([-+]?[0-9]+)' | |
113 | >>> unsigned_float = r'((?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)' | |
114 | >>> signed_float = r'([-+]?(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][-+]?[0-9]+)?)' | |
115 | >>> unsigned_float_no_exponent = r'((?:[0-9]+\.?[0-9]*|\.[0-9]+))' | |
116 | >>> signed_float_no_exponent = r'([-+]?(?:[0-9]+\.?[0-9]*|\.[0-9]+))' | |
117 | ||
118 | Note that ``"inf"`` and ``"nan"`` are deliberately omitted from the float definition because you | |
119 | wouldn't want (for example) ``"banana"`` to be converted into ``['ba', 'nan', 'a']``, | |
120 | Let's see an example: | |
121 | ||
122 | .. code-block:: python | |
123 | ||
124 | >>> re.split(signed_float, 'The mass of 3 electrons is 2.732815068E-30 kg') | |
125 | ['The mass of ', '3', ' electrons is ', '2.732815068E-30', ' kg'] | |
126 | ||
127 | .. note:: | |
128 | ||
129 | It is a bit of a lie to say the above are the complete regular expressions. In the | |
130 | actual code there is also handling for non-ASCII unicode characters (such as ⑦), | |
131 | but I will ignore that aspect of :mod:`natsort` in this discussion. | |
132 | ||
133 | Now, when the user wants to change the definition of a number, it is as easy as changing | |
134 | the pattern supplied to the regular expression engine. | |
135 | ||
136 | Choosing the right default is hard, though (well, in this case it shouldn't have been | |
137 | but I was rather thick-headed). | |
138 | In retrospect, it should have been obvious that since essentially all the code examples | |
139 | I had/have seen for natural sorting were for *unsigned integers*, I should have made the default | |
140 | definition of a number an *unsigned integer*. But, in the brash days of my youth I assumed | |
141 | that since my use case was real numbers, everyone else would be happier sorting by real numbers; | |
142 | so, I made the default definition of a number a *signed float with exponent*. | |
143 | `This astonished`_ `a lot`_ `of people`_ | |
144 | (`and some people aren't very nice when they are astonished`_). | |
145 | Starting with :mod:`natsort` version 4.0.0 the default number definition was | |
146 | changed to an *unsigned integer* which satisfies the "least astonishment" principle, and | |
147 | I have not heard a complaint since. | |
148 | ||
149 | .. admonition:: Wouldn't itertools.groupby work as well as regex to split strings? | |
150 | ||
151 | You *could* do it using something like :func:`itertools.groupby`, but it is not clearer | |
152 | nor more concise, *I promise*. | |
153 | ||
154 | .. code-block:: python | |
155 | ||
156 | >>> import itertools | |
157 | >>> import operator | |
158 | >>> list(map(''.join, map(operator.itemgetter(1), itertools.groupby('2 ft 11 in', str.isdigit)))) | |
159 | ['2', ' ft ', '11', ' in'] | |
160 | ||
161 | OK, but let's assume for a moment that you *really* like itertools and think the above | |
162 | is fine. We still have lost a lot of flexibility here because of the :meth:`str.isdigit` | |
163 | call which makes this method non-optimal; with a regular expression one can change | |
164 | the pattern string and split on much more complicated patterns, but with | |
165 | :func:`itertools.groupby` it becomes *much* more complicated to change it up; | |
166 | I implemented this strategy `as part of my testing`_ and it is anything but clear an concise. | |
167 | ||
168 | Not to mention it's *way* slower than regex. Just the simple example above (unsigned integers) | |
169 | is 50% slower than regex... | |
170 | ||
171 | Coercing Strings Containing Numbers Into Numbers | |
172 | ++++++++++++++++++++++++++++++++++++++++++++++++ | |
173 | ||
174 | There has been some debate on Stack Overflow as to what method is best to | |
175 | coerce a string to a number if it can be coerced, and leaving it alone otherwise | |
176 | (see `this one for coercion`_ and `this one for checking`_ for some high traffic questions), | |
177 | but it mostly boils down to two different solutions, shown here: | |
178 | ||
179 | .. code-block:: python | |
180 | ||
181 | >>> def coerce_try_except(x): | |
182 | ... try: | |
183 | ... return int(x) | |
184 | ... except ValueError: | |
185 | ... return x | |
186 | ... | |
187 | >>> def coerce_regex(x): | |
188 | ... # Note that precompiling the regex is more performant, | |
189 | ... # but I do not show that here for clarity's sake. | |
190 | ... return int(x) if re.match(r'[-+]?\d+$', x) else x | |
191 | ... | |
192 | ||
193 | Here are some timing results run on my machine: | |
194 | ||
195 | :: | |
196 | ||
197 | In [0]: numbers = list(map(str, range(100))) # A list of numbers as strings | |
198 | ||
199 | In [1]: not_numbers = ['banana' + x for x in numbers] | |
200 | ||
201 | In [2]: %timeit [coerce_try_except(x) for x in numbers] | |
202 | 10000 loops, best of 3: 51.1 µs per loop | |
203 | ||
204 | In [3]: %timeit [coerce_try_except(x) for x in not_numbers] | |
205 | 1000 loops, best of 3: 289 µs per loop | |
206 | ||
207 | In [4]: %timeit [coerce_regex(x) for x in not_numbers] | |
208 | 10000 loops, best of 3: 67.6 µs per loop | |
209 | ||
210 | In [5]: %timeit [coerce_regex(x) for x in numbers] | |
211 | 10000 loops, best of 3: 123 µs per loop | |
212 | ||
213 | What can we learn from this? The ``try: except`` method (arguably the most "pythonic" | |
214 | of the solutions) is best for numeric input, but performs over 5X slower for non-numeric | |
215 | input. Conversely, the regular expression method, though slower than ``try: except`` for | |
216 | both input types, is more efficient for non-numeric input than for input that can be | |
217 | converted to an ``int``. Further, even though the regular expression method is slower | |
218 | for both input types, it is always at least twice as fast as the worst case for the | |
219 | ``try: except``. | |
220 | ||
221 | Why do I care? Shouldn't I just pick a method and not worry about it? Probably. However, | |
222 | I am very conscious about the performance of :mod:`natsort`, and want it to be a true | |
223 | drop-in replacement for :func:`sorted` without having to incur a performance penalty. | |
224 | For the purposes of :mod:`natsort`, there is no clear winner between the two algorithms - | |
225 | the data being passed to this function will likely be a mix of numeric and non-numeric | |
226 | string content. Do I use the ``try: except`` method and hope the speed gains on | |
227 | numbers will offset the non-number performance, or do I use regular expressions and | |
228 | take the more stable performance? | |
229 | ||
230 | It turns out that within the context of :mod:`natsort`, some assumptions can be | |
231 | made that make a hybrid approach attractive. Because all strings are pre-split | |
232 | into numeric and non-numeric content *before* being passed to this coercion function, | |
233 | the assumption can be made that *if a string begins with a digit or a sign, it | |
234 | can be coerced into a number*. | |
235 | ||
236 | .. code-block:: python | |
237 | ||
238 | >>> def coerce_to_int(x): | |
239 | ... if x[0] in '0123456789+-': | |
240 | ... try: | |
241 | ... return int(x) | |
242 | ... except ValueError: | |
243 | ... return x | |
244 | ... else: | |
245 | ... return x | |
246 | ... | |
247 | ||
248 | So how does this perform compared to the standard coercion methods? | |
249 | ||
250 | :: | |
251 | ||
252 | In [6]: %timeit [coerce_to_int(x) for x in numbers] | |
253 | 10000 loops, best of 3: 71.6 µs per loop | |
254 | ||
255 | In [7]: %timeit [coerce_to_int(x) for x in not_numbers] | |
256 | 10000 loops, best of 3: 26.4 µs per loop | |
257 | ||
258 | The hybrid method eliminates most of the time wasted on numbers checking that it | |
259 | is in fact a number before passing to :func:`int`, and eliminates the time wasted | |
260 | in the exception stack for input that is not a number. | |
261 | ||
262 | That's as fast as we can get, right? In pure Python, probably. At least, it's | |
263 | close. But because I am crazy and a glutton for punishment, I decided to see | |
264 | if I could get any faster writing a C extension. It's called | |
265 | `fastnumbers`_ and contains a C implementation of the above coercion functions | |
266 | called :func:`fast_int`. How does it fair? Pretty well. | |
267 | ||
268 | :: | |
269 | ||
270 | In [8]: %timeit [fast_int(x) for x in numbers] | |
271 | 10000 loops, best of 3: 30.9 µs per loop | |
272 | ||
273 | In [9]: %timeit [fast_int(x) for x in not_numbers] | |
274 | 10000 loops, best of 3: 30 µs per loop | |
275 | ||
276 | During development of :mod:`natsort`, I wanted to ensure that using it did not | |
277 | get in the way of a user's program by introducing a performance penalty to their code. | |
278 | To that end, I do not feel like my adventures down the rabbit hole of optimization | |
279 | of coercion functions was a waste; I can confidently look users in the eye and | |
280 | say I considered every option in ensuring :mod:`natsort` is as efficient as possible. | |
281 | This is why if `fastnumbers`_ is installed it will be used for this step, | |
282 | and otherwise the hybrid method will be used. | |
283 | ||
284 | .. note:: | |
285 | ||
286 | Modifying the hybrid coercion function for floats is straightforward. | |
287 | ||
288 | .. code-block:: python | |
289 | ||
290 | >>> def coerce_to_float(x): | |
291 | ... if x[0] in '.0123456789+-' or x.lower().lstrip()[:3] in ('nan', 'inf'): | |
292 | ... try: | |
293 | ... return float(x) | |
294 | ... except ValueError: | |
295 | ... return x | |
296 | ... else: | |
297 | ... return x | |
298 | ... | |
299 | ||
300 | .. _tldr1: | |
301 | ||
302 | TL;DR 1 - The Simple "No Special Cases" Algorithm | |
303 | +++++++++++++++++++++++++++++++++++++++++++++++++ | |
304 | ||
305 | At this point, our :mod:`natsort` algorithm is essentially the following: | |
306 | ||
307 | .. code-block:: python | |
308 | ||
309 | >>> import re | |
310 | >>> def natsort_key(x, as_float=False, signed=False): | |
311 | ... if as_float: | |
312 | ... regex = signed_float if signed else unsigned_float | |
313 | ... else: | |
314 | ... regex = signed_int if signed else unsigned_int | |
315 | ... split_input = re.split(regex, x) | |
316 | ... split_input = filter(None, split_input) # removes null strings | |
317 | ... coerce = coerce_to_float if as_float else coerce_to_int | |
318 | ... return tuple(coerce(s) for s in split_input) | |
319 | ... | |
320 | ||
321 | I have written the above for clarity and not performance. | |
322 | This pretty much matches `most natural sort solutions for python on Stack Overflow`_ | |
323 | (except the above includes customization of the definition of a number). | |
324 | ||
325 | Special Cases Everywhere! | |
326 | ------------------------- | |
327 | ||
328 | .. contents:: | |
329 | :local: | |
330 | ||
331 | .. image:: special_cases_everywhere.jpg | |
332 | ||
333 | If what I described in :ref:`TL;DR 1 <tldr1>` were | |
334 | all that :mod:`natsort` needed to | |
335 | do then there probably wouldn't be much need for a third-party module, right? | |
336 | Probably. But it turns out that in real-world data there are a lot of | |
337 | special cases that need to be handled, and in true `80%/20%`_ fashion, the | |
338 | majority of the code in :mod:`natsort` is devoted to handling special cases | |
339 | like those described below. | |
340 | ||
341 | Sorting Filesystem Paths | |
342 | ++++++++++++++++++++++++ | |
343 | ||
344 | `The first major special case I encountered was sorting filesystem paths`_ | |
345 | (if you go to the link, you will see I didn't handle it well for a year... | |
346 | this was before I fully realized how much functionality I could really add | |
347 | to :mod:`natsort`). Let's apply the :func:`natsort_key` from above to some | |
348 | filesystem paths that you might see being auto-generated from your operating | |
349 | system: | |
350 | ||
351 | .. code-block:: python | |
352 | ||
353 | >>> paths = ['/p/Folder (10)/file.tar.gz', | |
354 | ... '/p/Folder/file.tar.gz', | |
355 | ... '/p/Folder (1)/file (1).tar.gz', | |
356 | ... '/p/Folder (1)/file.tar.gz'] | |
357 | >>> sorted(paths, key=natsort_key) | |
358 | ['/p/Folder (1)/file (1).tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz', '/p/Folder/file.tar.gz'] | |
359 | ||
360 | Well that's not right! What is ``'/p/Folder/file.tar.gz'`` doing at the end? | |
361 | It has to do with the numerical ASCII code assigned to the space and | |
362 | ``/`` characters in the `ASCII table`_. According to the `ASCII table`_, the | |
363 | space character (number 32) comes before the ``/`` character (number 47). If | |
364 | we remove the common prefix in all of the above strings (``'/p/Folder'``), we | |
365 | can see why this happens: | |
366 | ||
367 | .. code-block:: python | |
368 | ||
369 | >>> ' (1)/file.tar.gz' < '/file.tar.gz' | |
370 | True | |
371 | >>> ' ' < '/' | |
372 | True | |
373 | ||
374 | This isn't very convenient... how do we solve it? We can split the path | |
375 | across the path separators and then sort. A convenient way do to this is | |
376 | with the `Path.parts`_ method from :mod:`pathlib`: | |
377 | ||
378 | .. code-block:: python | |
379 | ||
380 | >>> import pathlib | |
381 | >>> sorted(paths, key=lambda x: tuple(natsort_key(s) for s in pathlib.Path(x).parts)) | |
382 | ['/p/Folder/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (10)/file.tar.gz'] | |
383 | ||
384 | Almost! It seems like there is some funny business going on in the final | |
385 | filename component as well. We can solve that nicely and quickly with `Path.suffixes`_ | |
386 | and `Path.stem`_. | |
387 | ||
388 | .. code-block:: python | |
389 | ||
390 | >>> def decompose_path_into_components(x): | |
391 | ... path_split = list(pathlib.Path(x).parts) | |
392 | ... # Remove the final filename component from the path. | |
393 | ... final_component = pathlib.Path(path_split.pop()) | |
394 | ... # Split off all the extensions. | |
395 | ... suffixes = final_component.suffixes | |
396 | ... stem = final_component.name.replace(''.join(suffixes), '') | |
397 | ... # Remove the '.' prefix of each extension, and make that | |
398 | ... # final component a list of the stem and each suffix. | |
399 | ... final_component = [stem] + [x[1:] for x in suffixes] | |
400 | ... # Replace the split final filename component. | |
401 | ... path_split.extend(final_component) | |
402 | ... return path_split | |
403 | ... | |
404 | >>> def natsort_key_with_path_support(x): | |
405 | ... return tuple(natsort_key(s) for s in decompose_path_into_components(x)) | |
406 | ... | |
407 | >>> sorted(paths, key=natsort_key_with_path_support) | |
408 | ['/p/Folder/file.tar.gz', '/p/Folder (1)/file.tar.gz', '/p/Folder (1)/file (1).tar.gz', '/p/Folder (10)/file.tar.gz'] | |
409 | ||
410 | This works because in addition to breaking the input by path separators, the final | |
411 | filename component is separated from its extensions as well [#f1]_. *Then*, each of these | |
412 | separated components is sent to the :mod:`natsort` algorithm, so the result is | |
413 | a tuple of tuples. Once that is done, we can see how comparisons can be done in | |
414 | the expected manner. | |
415 | ||
416 | .. code-block:: python | |
417 | ||
418 | >>> a = natsort_key_with_path_support('/p/Folder (1)/file (1).tar.gz') | |
419 | >>> a | |
420 | (('/',), ('p',), ('Folder (', 1, ')'), ('file (', 1, ')'), ('tar',), ('gz',)) | |
421 | >>> | |
422 | >>> b = natsort_key_with_path_support('/p/Folder/file.tar.gz') | |
423 | >>> b | |
424 | (('/',), ('p',), ('Folder',), ('file',), ('tar',), ('gz',)) | |
425 | >>> | |
426 | >>> a > b | |
427 | True | |
428 | ||
429 | Comparing Different Types on Python 3 | |
430 | +++++++++++++++++++++++++++++++++++++ | |
431 | ||
432 | `The second major special case I encountered was sorting of different types`_. | |
433 | If you are on Python 2 (i.e. legacy Python), this mostly doesn't matter *too* | |
434 | much since it uses an arbitrary heuristic to allow traditionally un-comparable | |
435 | types to be compared (such as comparing ``'a'`` to ``1``). However, on Python 3 | |
436 | (i.e. Python) it simply won't let you perform such nonsense, raising a | |
437 | :exc:`TypeError` instead. | |
438 | ||
439 | You can imagine that a module that breaks strings into tuples of numbers and | |
440 | strings is walking a dangerous line if it does not have special handling for | |
441 | comparing numbers and strings. My imagination was not so great at first. | |
442 | Let's take a look at all the ways this can fail with real-world data. | |
443 | ||
444 | .. code-block:: python | |
445 | ||
446 | >>> def natsort_key_with_poor_real_number_support(x): | |
447 | ... split_input = re.split(signed_float, x) | |
448 | ... split_input = filter(None, split_input) # removes null strings | |
449 | ... return tuple(coerce_to_float(s) for s in split_input) | |
450 | >>> | |
451 | >>> sorted([5, '4'], key=natsort_key_with_poor_real_number_support) | |
452 | Traceback (most recent call last): | |
453 | ... | |
454 | TypeError: ... | |
455 | >>> | |
456 | >>> sorted(['12 apples', 'apples'], key=natsort_key_with_poor_real_number_support) | |
457 | Traceback (most recent call last): | |
458 | ... | |
459 | TypeError: ... | |
460 | >>> | |
461 | >>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_poor_real_number_support) | |
462 | Traceback (most recent call last): | |
463 | ... | |
464 | TypeError: ... | |
465 | ||
466 | Let's break these down. | |
467 | ||
468 | #. The integer ``5`` is sent to ``re.split`` which expects only strings | |
469 | or bytes, which is a no-no. | |
470 | #. ``natsort_key_with_poor_real_number_support('12 apples') < natsort_key_with_poor_real_number_support('apples')`` | |
471 | is the same as ``(12.0, ' apples') < ('apples',)``, and thus a number gets | |
472 | compared to a string [#f2]_ which also is a no-no. | |
473 | #. This one scores big on the astonishment scale, especially if one accidentally | |
474 | uses signed integers or real numbers when they mean to use unsigned integers. | |
475 | ``natsort_key_with_poor_real_number_support('version5.3.0') < natsort_key_with_poor_real_number_support('version5.3rc1')`` | |
476 | is the same as ``('version', 5.3, 0.0) < ('version', 5.3, 'rc', 1.0)``, so in the | |
477 | third element a number gets compared to a string, once again the same | |
478 | old no-no. (The same would happen with ``'version5-3'`` and ``'version5-a'``, | |
479 | which would be come ``('version', 5, -3)`` and ``('version', 5, '-a')``). | |
480 | ||
481 | As you might expect, the solution to the first issue is to wrap the ``re.split`` | |
482 | call in a ``try: except:`` block and handle the number specially if a | |
483 | :exc:`TypeError` is raised. The second and third cases *could* be handled | |
484 | in a "special case" manner, meaning only respond and do something different | |
485 | if these problems are detected. But a less error-prone method is to ensure | |
486 | that the data is correct-by-construction, and this can be done by ensuring | |
487 | that the returned tuples *always* start with a string, and then alternate | |
488 | in a string-number-string-number-string patter;n this can be achieved by | |
489 | adding an empty string wherever the pattern is not followed [#f3]_. This ends | |
490 | up working out pretty nicely because empty strings are always "less" than | |
491 | any non-empty string, and we typically want numbers to come before strings. | |
492 | ||
493 | Let's take a look at how this works out. | |
494 | ||
495 | .. code-block:: python | |
496 | ||
497 | >>> from natsort.utils import _sep_inserter | |
498 | >>> list(_sep_inserter(iter(['apples']), '')) | |
499 | ['apples'] | |
500 | >>> | |
501 | >>> list(_sep_inserter(iter([12, ' apples']), '')) | |
502 | ['', 12, ' apples'] | |
503 | >>> | |
504 | >>> list(_sep_inserter(iter(['version', 5, -3]), '')) | |
505 | ['version', 5, '', -3] | |
506 | >>> | |
507 | >>> from natsort import natsort_keygen, ns | |
508 | >>> natsort_key_with_good_real_number_support = natsort_keygen(alg=ns.REAL) | |
509 | >>> | |
510 | >>> sorted([5, '4'], key=natsort_key_with_good_real_number_support) | |
511 | ['4', 5] | |
512 | >>> | |
513 | >>> sorted(['12 apples', 'apples'], key=natsort_key_with_good_real_number_support) | |
514 | ['12 apples', 'apples'] | |
515 | >>> | |
516 | >>> sorted(['version5.3.0', 'version5.3rc1'], key=natsort_key_with_good_real_number_support) | |
517 | ['version5.3.0', 'version5.3rc1'] | |
518 | ||
519 | How the "good" version works will be given in `TL;DR 2 - Handling Crappy, Real-World Input`_. | |
520 | ||
521 | Handling NaN | |
522 | ++++++++++++ | |
523 | ||
524 | `A rather unexpected special case I encountered was sorting collections containing NaN`_. | |
525 | Let's see what happens when you try to sort a plain old list of numbers when there | |
526 | is a **NaN** floating around in there. | |
527 | ||
528 | .. code-block:: python | |
529 | ||
530 | >>> danger = [7, float('nan'), 22.7, 19, -14, 59.123, 4] | |
531 | >>> sorted(danger) | |
532 | [7, nan, -14, 4, 19, 22.7, 59.123] | |
533 | ||
534 | Clearly that isn't correct, and for once it isn't my fault! | |
535 | `It's hard to compare floating point numbers`_. By definition, **NaN** is unorderable | |
536 | to any other number, and is never equal to any other number, including itself. | |
537 | ||
538 | .. code-block:: python | |
539 | ||
540 | >>> nan = float('nan') | |
541 | >>> 5 > nan | |
542 | False | |
543 | >>> 5 < nan | |
544 | False | |
545 | >>> 5 == nan | |
546 | False | |
547 | >>> 5 != nan | |
548 | True | |
549 | >>> nan == nan | |
550 | False | |
551 | >>> nan != nan | |
552 | True | |
553 | ||
554 | The implication of all this for us is that if there is an **NaN** in the | |
555 | data-set we are trying to sort, the data-set will end up being sorted in | |
556 | two separate yet individually sorted sequences - the one *before* the **NaN**, | |
557 | and the one *after*. This is because the ``<`` operation that is used | |
558 | to sort always returns :const:`False` with **NaN**. | |
559 | ||
560 | Because :mod:`natsort` aims to sort sequences in a way that does not surprise | |
561 | the user, keeping this behavior is not acceptable (I don't require my users | |
562 | to know how **NaN** will behave in a sorting algorithm). The simplest way to | |
563 | satisfy the "least astonishment" principle is to substitute **NaN** with | |
564 | some other value. But what value is *least* astonishing? I chose to replace | |
565 | **NaN** with :math:`-\infty` so that these poorly behaved elements always | |
566 | end up at the front where the users will most likely be alerted to their presence. | |
567 | ||
568 | .. code-block:: python | |
569 | ||
570 | >>> def fix_nan(x): | |
571 | ... if x != x: # only true for NaN | |
572 | ... return float('-inf') | |
573 | ... else: | |
574 | ... return x | |
575 | ... | |
576 | ||
577 | Let's check out :ref:`TL;DR 2 <tldr2>` to see how this can be | |
578 | incorporated into the simple key function from :ref:`TL;DR 1 <tldr1>`. | |
579 | ||
580 | .. _tldr2: | |
581 | ||
582 | TL;DR 2 - Handling Crappy, Real-World Input | |
583 | +++++++++++++++++++++++++++++++++++++++++++ | |
584 | ||
585 | Let's see how our elegant key function from :ref:`TL;DR 1 <tldr1>` has | |
586 | become bastardized in order to support handling mixed real-world data | |
587 | and user customizations. | |
588 | ||
589 | >>> def natsort_key(x, as_float=False, signed=False, as_path=False): | |
590 | ... if as_float: | |
591 | ... regex = signed_float if signed else unsigned_float | |
592 | ... else: | |
593 | ... regex = signed_int if signed else unsigned_int | |
594 | ... try: | |
595 | ... if as_path: | |
596 | ... x = decompose_path_into_components(x) # Decomposes into list of strings | |
597 | ... # If this raises a TypeError, input is not a string. | |
598 | ... split_input = re.split(regex, x) | |
599 | ... except TypeError: | |
600 | ... try: | |
601 | ... # Does this need to be applied recursively (list-of-list)? | |
602 | ... return tuple(map(natsort_key, x)) | |
603 | ... except TypeError: | |
604 | ... # Must be a number | |
605 | ... ret = ('', fix_nan(x)) # Maintain string-number-string pattern | |
606 | ... return (ret,) if as_path else ret # as_path returns tuple-of-tuples | |
607 | ... else: | |
608 | ... split_input = filter(None, split_input) # removes null strings | |
609 | ... # Note that the coerce_to_int/coerce_to_float functions | |
610 | ... # are also modified to use the fix_nan function. | |
611 | ... if as_float: | |
612 | ... coerced_input = (coerce_to_float(s) for s in split_input) | |
613 | ... else: | |
614 | ... coerced_input = (coerce_to_int(s) for s in split_input) | |
615 | ... return tuple(_sep_inserter(coerced_input, '')) | |
616 | ... | |
617 | ||
618 | And this doesn't even show handling :class:`bytes` type! Notice that we have | |
619 | to do non-obvious things like modify the return form of numbers when ``as_path`` | |
620 | is given, just to avoid comparing strings and numbers for the case in which a user provides | |
621 | input like ``['/home/me', 42]``. | |
622 | ||
623 | Let's take it out for a spin! | |
624 | ||
625 | .. code-block:: python | |
626 | ||
627 | >>> danger = [7, float('nan'), 22.7, '19', '-14', '59.123', 4] | |
628 | >>> sorted(danger, key=lambda x: natsort_key(x, as_float=True, signed=True)) | |
629 | [nan, '-14', 4, 7, '19', 22.7, '59.123'] | |
630 | >>> | |
631 | >>> paths = ['/p/Folder (1)/file.tar.gz', | |
632 | ... '/p/Folder/file.tar.gz', | |
633 | ... 123456] | |
634 | >>> sorted(paths, key=lambda x: natsort_key(x, as_path=True)) | |
635 | [123456, '/p/Folder/file.tar.gz', '/p/Folder (1)/file.tar.gz'] | |
636 | ||
637 | Here Be Dragons: Adding Locale Support | |
638 | -------------------------------------- | |
639 | ||
640 | .. contents:: | |
641 | :local: | |
642 | ||
643 | Probably the most challenging special case I had to handle was getting | |
644 | :mod:`natsort` to handle sorting the non-numerical parts of input | |
645 | correctly, and also allowing it to sort the numerical bits in different | |
646 | locales. This was in no way what I originally set out to do with this | |
647 | library, so I was `caught a bit off guard when the request was initially made`_. | |
648 | I discovered the :mod:`locale` library, and assumed that if it's part of Python's | |
649 | StdLib there can't be too many dragons, right? | |
650 | ||
651 | .. admonition:: INCOMPLETE LIST OF DRAGONS | |
652 | ||
653 | - https://github.com/SethMMorton/natsort/issues/21 | |
654 | - https://github.com/SethMMorton/natsort/issues/22 | |
655 | - https://github.com/SethMMorton/natsort/issues/23 | |
656 | - https://github.com/SethMMorton/natsort/issues/36 | |
657 | - https://github.com/SethMMorton/natsort/issues/44 | |
658 | - https://bugs.python.org/issue2481 | |
659 | - https://bugs.python.org/issue23195 | |
660 | - https://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help | |
661 | - https://stackoverflow.com/questions/22203550/sort-dictionary-by-key-using-locale-collation | |
662 | - https://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm | |
663 | - https://stackoverflow.com/questions/36431810/sort-numeric-lines-with-thousand-separators | |
664 | - https://stackoverflow.com/questions/45734562/how-can-i-get-a-reasonable-string-sorting-with-python | |
665 | ||
666 | These can be summed up as follows: | |
667 | ||
668 | #. :mod:`locale` is a thin wrapper over your operating system's *locale* | |
669 | library, so if *that* is broken (like it is on BSD and OSX) then | |
670 | :mod:`locale` is broken in Python. | |
671 | #. Because of a bug in legacy Python (i.e. Python 2), there is no uniform way to use | |
672 | the :mod:`locale` sorting functionality between legacy Python and Python 3. | |
673 | #. People have differing opinions of how capitalization should affect word order. | |
674 | #. There is no built-in way to handle locale-dependent thousands separators | |
675 | and decimal points *robustly*. | |
676 | #. Proper handling of Unicode is complicated. | |
677 | #. Proper handling of :mod:`locale` is complicated. | |
678 | ||
679 | Easily over half of the the code in :mod:`natsort` is in some way dealing with some | |
680 | aspect of :mod:`locale` or basic case handling. It would have been | |
681 | impossible to get right without a `really good`_ `testing strategy`_. | |
682 | ||
683 | Don't expect any more TL;DR's... if you want to see how all this is fully | |
684 | incorporated into the :mod:`natsort` algorithm then please take a look | |
685 | `at the code`_. However, I will hint at how specific steps are taken in | |
686 | each section. | |
687 | ||
688 | Let's see how we can handle some of the dragons, one-by-one. | |
689 | ||
690 | Basic Case Control Support | |
691 | ++++++++++++++++++++++++++ | |
692 | ||
693 | Without even thinking about the mess that is adding :mod:`locale` support, | |
694 | :mod:`natsort` can introduce support for controlling how case is interpreted. | |
695 | ||
696 | First, let's take a look at how it is sorted by default (due to | |
697 | where characters lie on the `ASCII table`_). | |
698 | ||
699 | .. code-block:: python | |
700 | ||
701 | >>> a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
702 | >>> sorted(a) | |
703 | ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
704 | ||
705 | All uppercase letters come before lowercase letters in the `ASCII table`_, | |
706 | so all capitalized words appear first. Not everyone agrees that this | |
707 | is the correct order. Some believe that the capitalized words should | |
708 | be last (``['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn']``). | |
709 | Some believe that both the lowercase and uppercase versions | |
710 | should appear together (``['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn']``). | |
711 | Some believe that both should be true ☹. Some people don't care at all [#f4]_. | |
712 | ||
713 | Solving the first case (I call it *LOWERCASEFIRST*) is actually pretty | |
714 | easy... just call the :meth:`str.swapcase` method on the input. | |
715 | ||
716 | .. code-block:: python | |
717 | ||
718 | >>> sorted(a, key=lambda x: x.swapcase()) | |
719 | ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
720 | ||
721 | The last (i call it *IGNORECASE*) should be super easy, right? | |
722 | Simply call :meth:`str.lowercase` on the input. This will work but may | |
723 | not always give the correct answer on non-latin character sets. It's | |
724 | a good thing that in Python 3.3 | |
725 | :meth:`str.casefold` was introduced, which does a better job of removing | |
726 | all case information from unicode characters in | |
727 | non-latin alphabets. | |
728 | ||
729 | .. code-block:: python | |
730 | ||
731 | >>> def remove_case(x): | |
732 | ... try: | |
733 | ... return x.casefold() | |
734 | ... except AttributeError: # Legacy Python backwards compatibility | |
735 | ... return x.lowercase() | |
736 | ... | |
737 | >>> sorted(a, key=remove_case) | |
738 | ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
739 | ||
740 | The middle case (I call it *GROUPLETTERS*) is less straightforward. | |
741 | The most efficient way to handle this is to duplicate each character | |
742 | with its lowercase version and then the original character. | |
743 | ||
744 | .. code-block:: python | |
745 | ||
746 | >>> import itertools | |
747 | >>> def groupletters(x): | |
748 | ... return ''.join(itertools.chain.from_iterable((remove_case(y), y) for y in x)) | |
749 | ... | |
750 | >>> groupletters('Apple') | |
751 | 'aAppppllee' | |
752 | >>> groupletters('apple') | |
753 | 'aappppllee' | |
754 | >>> sorted(a, key=groupletters) | |
755 | ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
756 | ||
757 | The effect of this is that both ``'Apple'`` and ``'apple'`` are | |
758 | placed adjacent to each other because their transformations both begin | |
759 | with ``'a'``, and then the second character can be used to order them | |
760 | appropriately with respect to each other. | |
761 | ||
762 | There's a problem with this, though. Within the context of :mod:`natsort` | |
763 | we are trying to correctly sort numbers and those should be left alone. | |
764 | ||
765 | .. code-block:: python | |
766 | ||
767 | >>> a = ['Apple5', 'apple', 'Apple4E10', 'Banana'] | |
768 | >>> sorted(a, key=lambda x: natsort_key(x, as_float=True)) | |
769 | ['Apple5', 'Apple4E10', 'Banana', 'apple'] | |
770 | >>> sorted(a, key=lambda x: natsort_key(groupletters(x), as_float=True)) | |
771 | ['Apple4E10', 'Apple5', 'apple', 'Banana'] | |
772 | >>> groupletters('Apple4E10') | |
773 | 'aAppppllee44eE1100' | |
774 | ||
775 | We messed up the numbers! Looks like :func:`groupletters` needs to be applied | |
776 | *after* the strings are broken into their components. I'm not going to show | |
777 | how this is done here, but basically it requires applying the function in | |
778 | the ``else:`` block of :func:`coerce_to_int`/:func:`coerce_to_float`. | |
779 | ||
780 | .. code-block:: python | |
781 | ||
782 | >>> better_groupletters = natsort_keygen(alg=ns.GROUPLETTERS | ns.REAL) | |
783 | >>> better_groupletters('Apple4E10') | |
784 | ('aAppppllee', 40000000000.0) | |
785 | >>> sorted(a, key=better_groupletters) | |
786 | ['Apple5', 'Apple4E10', 'apple', 'Banana'] | |
787 | ||
788 | Of course, applying both *LOWERCASEFIRST* and *GROUPLETTERS* is just | |
789 | a matter of turning on both functions. | |
790 | ||
791 | Basic Unicode Support | |
792 | +++++++++++++++++++++ | |
793 | ||
794 | Unicode is hard and complicated. Here's an example. | |
795 | ||
796 | .. code-block:: python | |
797 | ||
798 | >>> b = [b'\x66', b'\x65', b'\xc3\xa9', b'\x65\xcc\x81', b'\x61', b'\x7a'] | |
799 | >>> a = [x.decode('utf8') for x in b] | |
800 | >>> a # doctest: +SKIP | |
801 | ['f', 'e', 'é', 'é', 'a', 'z'] | |
802 | >>> sorted(a) # doctest: +SKIP | |
803 | ['a', 'e', 'é', 'f', 'z', 'é'] | |
804 | ||
805 | ||
806 | There are more than one way to represent the character 'é' in Unicode. | |
807 | In fact, many characters have multiple representations. This is a challenge | |
808 | because comparing the two representations would return ``False`` even though | |
809 | they *look* the same. | |
810 | ||
811 | .. code-block:: python | |
812 | ||
813 | >>> a[2] == a[3] | |
814 | False | |
815 | ||
816 | Alas, since characters are compared based on the numerical value of their | |
817 | representation, sorting Unicode often gives unexpected results (like seeing | |
818 | 'é' come both *before* and *after* 'z'). | |
819 | ||
820 | The original approach that :mod:`natsort` took with respect to non-ASCII | |
821 | Unicode characters was to say "just use | |
822 | the :mod:`locale` or :mod:`PyICU` library" and then cross it's fingers | |
823 | and hope those libraries take care of it. As you will find in the following | |
824 | sections, that comes with its own baggage, and turned out to not always work anyway | |
825 | (see https://stackoverflow.com/q/45734562/1399279). A more robust approach is to | |
826 | handle the Unicode out-of-the-box without invoking a heavy-handed library | |
827 | like :mod:`locale` or :mod:`PyICU`. To do this, we must use *normalization*. | |
828 | ||
829 | To fully understand Unicode normalization, `check out some official Unicode documentation`_. | |
830 | Just kidding... that's too much text. The following StackOverflow answers do | |
831 | a good job at explaining Unicode normalization in simple terms: | |
832 | https://stackoverflow.com/a/7934397/1399279 and | |
833 | https://stackoverflow.com/a/7931547/1399279. Put simply, normalization | |
834 | ensures that Unicode characters with multiple representations are in | |
835 | some canonical and consistent representation so that (for example) comparisons | |
836 | of the characters can be performed in a sane way. The following discussion | |
837 | assumes you at least read the StackOverflow answers. | |
838 | ||
839 | Looking back at our 'é' example, we can see that the two versions were | |
840 | constructed with the byte strings ``b'\xc3\xa9'`` and ``b'\x65\xcc\x81'``. | |
841 | The former representation is actually | |
842 | `LATIN SMALL LETTER E WITH ACUTE <http://www.fileformat.info/info/unicode/char/e9/index.htm>`_ | |
843 | and is a single character in the Unicode standard. This is known as the | |
844 | *compressed form* and corresponds to the 'NFC' normalization scheme. | |
845 | The latter representation is actually the letter 'e' followed by | |
846 | `COMBINING ACUTE ACCENT <http://www.fileformat.info/info/unicode/char/0301/index.htm>`_ | |
847 | and so is two characters in the Unicode standard. This is known as the | |
848 | *decompressed form* and corresponds to the 'NFD' normalization scheme. | |
849 | Since the first character in the decompressed form is actually the letter 'e', | |
850 | when compared to other ASCII characters it fits where you might expect. | |
851 | Unfortunately, all Unicode compressed form characters come after the | |
852 | ASCII characters and so they always will be placed after 'z' when sorting. | |
853 | ||
854 | It seems that most Unicode data is stored and shared in the compressed form | |
855 | which makes it challenging to sort. This can be solved by normalizing all | |
856 | incoming Unicode data to the decompressed form ('NFD') and *then* sorting. | |
857 | ||
858 | .. code-block:: python | |
859 | ||
860 | >>> import unicodedata | |
861 | >>> c = [unicodedata.normalize('NFD', x) for x in a] | |
862 | >>> c # doctest: +SKIP | |
863 | ['f', 'e', 'é', 'é', 'a', 'z'] | |
864 | >>> sorted(c) # doctest: +SKIP | |
865 | ['a', 'e', 'é', 'é', 'f', 'z'] | |
866 | ||
867 | Huzzah! Sane sorting without having to resort to :mod:`locale`! | |
868 | ||
869 | Using Locale to Compare Strings | |
870 | +++++++++++++++++++++++++++++++ | |
871 | ||
872 | The :mod:`locale` module is actually pretty cool, and provides lowly | |
873 | spare-time programmers like myself a way to handle the daunting task | |
874 | of proper locale-dependent support of their libraries and utilities. | |
875 | Having said that, it can be a bit of a bear to get right, | |
876 | `although they do point out in the documentation that it will be painful to use`_. | |
877 | Aside from the caveats spelled out in that link, it turns out that just | |
878 | comparing strings with :mod:`locale` in a cross-platform and | |
879 | cross-python-version manner is not as straightforward as one might hope. | |
880 | ||
881 | First, how to use :mod:`locale` to compare strings? It's actually | |
882 | pretty straightforward. Simply run the input through the :mod:`locale` | |
883 | transformation function :func:`locale.strxfrm`. | |
884 | ||
885 | .. code-block:: python | |
886 | ||
887 | >>> import locale, sys | |
888 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
889 | 'en_US.UTF-8' | |
890 | >>> a = ['a', 'b', 'ä'] | |
891 | >>> sorted(a) | |
892 | ['a', 'b', 'ä'] | |
893 | >>> # The below fails on OSX, so don't run doctest on darwin. | |
894 | >>> is_osx = sys.platform == 'darwin' | |
895 | >>> sorted(a, key=locale.strxfrm) if not is_osx else ['a', 'ä', 'b'] | |
896 | ['a', 'ä', 'b'] | |
897 | >>> | |
898 | >>> a = ['apple', 'Banana', 'banana', 'Apple'] | |
899 | >>> sorted(a, key=locale.strxfrm) if not is_osx else ['apple', 'Apple', 'banana', 'Banana'] | |
900 | ['apple', 'Apple', 'banana', 'Banana'] | |
901 | ||
902 | It turns out that locale-aware sorting groups numbers in the same | |
903 | way as turning on *GROUPLETTERS* and *LOWERCASEFIRST*. | |
904 | The trick is that you have to apply :func:`locale.strxfrm` only to non-numeric | |
905 | characters; otherwise, numbers won't be parsed properly. Therefore, it must | |
906 | be applied as part of the :func:`coerce_to_int`/:func:`coerce_to_float` | |
907 | functions in a manner similar to :func:`groupletters`. | |
908 | ||
909 | As you might have guessed, there is a small problem. | |
910 | It turns out the there is a bug in the legacy Python implementation of | |
911 | :func:`locale.strxfrm` that causes it to outright fail for :func:`unicode` | |
912 | input (https://bugs.python.org/issue2481). :func:`locale.strcoll` works, | |
913 | but is intended for use with ``cmp``, which does not exist in current Python | |
914 | implementations. Luckily, the :func:`functools.cmp_to_key` function | |
915 | makes :func:`locale.strcoll` behave like :func:`locale.strxfrm` (that is, of course, | |
916 | unless you are on Python 2.6 where :func:`functools.cmp_to_key` doesn't exist, | |
917 | in which case you simply copy-paste the implementation from Python 2.7 | |
918 | directly into your code ☹). | |
919 | ||
920 | Handling Broken Locale On OSX | |
921 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
922 | ||
923 | But what if the underlying *locale* implementation that :mod:`locale` | |
924 | relies upon is simply broken? It turns out that the *locale* library on | |
925 | OSX (and other BSD systems) is broken (and for some reason has never been | |
926 | fixed?), and so :mod:`locale` does not work as expected. | |
927 | ||
928 | How do I define doesn't work as expected? | |
929 | ||
930 | .. code-block:: python | |
931 | ||
932 | >>> a = ['apple', 'Banana', 'banana', 'Apple'] | |
933 | >>> sorted(a) | |
934 | ['Apple', 'Banana', 'apple', 'banana'] | |
935 | >>> | |
936 | >>> sorted(a, key=locale.strxfrm) if is_osx else sorted(a) | |
937 | ['Apple', 'Banana', 'apple', 'banana'] | |
938 | ||
939 | IT'S SORTING AS IF :func:`locale.stfxfrm` WAS NEVER USED!! (and it's worse | |
940 | once non-ASCII characters get thrown into the mix.) I'm really not | |
941 | sure why this is considered OK for the OSX/BSD maintainers to not fix, | |
942 | but it's more than frustrating for poor developers who have been dragged | |
943 | into the *locale* game kicking and screaming. *<deep breath>*. | |
944 | ||
945 | So, how to deal with this situation? There are two ways to do so. | |
946 | ||
947 | #. Detect if :mod:`locale` is sorting incorrectly (i.e. ``dumb``) by seeing | |
948 | if ``'A'`` is sorted before ``'a'`` (incorrect) or not. | |
949 | ||
950 | .. code-block:: python | |
951 | ||
952 | >>> # This is genuinely the name of this function. | |
953 | >>> # See natsort.compat.locale.py | |
954 | >>> def dumb_sort(): | |
955 | ... return locale.strxfrm('A') < locale.strxfrm('a') | |
956 | ... | |
957 | ||
958 | If a ``dumb`` *locale* implementation is found, then automatically | |
959 | turn on *LOWERCASEFIRST* and *GROUPLETTERS*. | |
960 | #. Use an alternate library if installed. `ICU <http://site.icu-project.org/>`_ | |
961 | is a great and powerful library that has a pretty decent Python port | |
962 | called (you guessed it) `PyICU <https://pypi.python.org/pypi/PyICU/>`_. | |
963 | If a user has this library installed on their computer, :mod:`natsort` | |
964 | chooses to use that instead of :mod:`locale`. With a little bit of | |
965 | planning, one can write a set of wrapper functions that call | |
966 | the correct library under the hood such that the business logic never | |
967 | has to know what library is being used (see `natsort.compat.locale.py`_). | |
968 | ||
969 | Let me tell you, this little complication really makes a challenge of testing | |
970 | the code, since one must set up different environments on different operating | |
971 | systems in order to test all possible code paths. Not to mention that | |
972 | certain checks *will* fail for certain operating systems and environments | |
973 | so one must be diligent in either writing the tests not to fail, or ignoring | |
974 | those tests when on offending environments. | |
975 | ||
976 | Handling Locale-Aware Numbers | |
977 | +++++++++++++++++++++++++++++ | |
978 | ||
979 | `Thousands separator support`_ is a problem that I knew would someday be | |
980 | requested but had decided to push off until a rainy day. One day it finally | |
981 | rained, and I decided to tackle the problem. | |
982 | ||
983 | So what is the problem? Consider the number ``1,234,567`` (assuming the | |
984 | ``','`` is the thousands separator). Try to run that through :func:`int` | |
985 | and you will get a :exc:`ValueError`. To handle this properly the thousands | |
986 | separators must be removed. | |
987 | ||
988 | .. code-block:: python | |
989 | ||
990 | >>> float('1,234,567'.replace(',', '')) | |
991 | 1234567.0 | |
992 | ||
993 | What if, in our current locale, the thousands separator is ``'.'`` and | |
994 | the ``','`` is the decimal separator (like for the German locale *de_DE*)? | |
995 | ||
996 | .. code-block:: python | |
997 | ||
998 | >>> float('1.234.567'.replace('.', '').replace(',', '.')) | |
999 | 1234567.0 | |
1000 | >>> float('1.234.567,89'.replace('.', '').replace(',', '.')) | |
1001 | 1234567.89 | |
1002 | ||
1003 | This is pretty much what :func:`locale.atoi` and :func:`locale.atof` do | |
1004 | under the hood. So what's the problem? Why doesn't :mod:`natsort` just | |
1005 | use this method under its hood? | |
1006 | Well, let's take a look at what would happen if we send some possible | |
1007 | :mod:`natsort` input through our the above function: | |
1008 | ||
1009 | .. code-block:: python | |
1010 | ||
1011 | >>> natsort_key('1,234 apples, please.'.replace(',', '')) | |
1012 | ('', 1234, ' apples please.') | |
1013 | >>> natsort_key('Sir, €1.234,50 please.'.replace('.', '').replace(',', '.'), as_float=True) | |
1014 | ('Sir. €', 1234.5, ' please') | |
1015 | ||
1016 | Any character matching the thousands separator was dropped, and anything | |
1017 | matching the decimal separator was changed to ``'.'``! If these characters | |
1018 | were critical to how your data was ordered, this would break :mod:`natsort`. | |
1019 | ||
1020 | The first solution one might consider would be to first decompose the | |
1021 | input into sub-components (like we did for the *GROUPLETTERS* method | |
1022 | above) and then only apply these transformations on the number components. | |
1023 | This is a chicken-and-egg problem, though, because *we cannot appropriately | |
1024 | separate out the numbers because of the thousands separators and | |
1025 | non-'.' decimal separators* (well, at least not without making multiple | |
1026 | passes over the data which I do not consider to be a valid option). | |
1027 | ||
1028 | Regular expressions to the rescue! With regular expressions, we can | |
1029 | remove the thousands separators and change the decimal separator only | |
1030 | when they are actually within a number. Once the input has been | |
1031 | pre-processed with this regular expression, all the infrastructure | |
1032 | shown previously will work. | |
1033 | ||
1034 | Beware, these regular expressions will make your eyes bleed. | |
1035 | ||
1036 | .. code-block:: python | |
1037 | ||
1038 | >>> decimal = ',' # Assume German locale, so decimal separator is ',' | |
1039 | >>> # Look-behind assertions cannot accept range modifiers, so instead of i.e. | |
1040 | >>> # (?<!\.[0-9]{1,3}) I have to repeat the look-behind for 1, 2, and 3. | |
1041 | >>> nodecimal = r'(?<!{dec}[0-9])(?<!{dec}[0-9]{{2}})(?<!{dec}[0-9]{{3}})'.format(dec=decimal) | |
1042 | >>> strip_thousands = r''' | |
1043 | ... (?<=[0-9]{{1}}) # At least 1 number | |
1044 | ... (?<![0-9]{{4}}) # No more than 3 numbers | |
1045 | ... {nodecimal} # Cannot follow decimal | |
1046 | ... {thou} # The thousands separator | |
1047 | ... (?=[0-9]{{3}} # Three numbers must follow | |
1048 | ... ([^0-9]|$) # But a non-number after that | |
1049 | ... ) | |
1050 | ... '''.format(nodecimal=nodecimal, thou='.') # Thousands separator is '.' in German locale. | |
1051 | ... | |
1052 | >>> re.sub(strip_thousands, '', 'Sir, €1.234,50 please.', flags=re.X) | |
1053 | 'Sir, €1234,50 please.' | |
1054 | >>> | |
1055 | >>> # The decimal point must be preceded by a number or after | |
1056 | >>> # a number. This option only needs to be performed in the | |
1057 | >>> # case when the decimal separator for the locale is not '.'. | |
1058 | >>> switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' | |
1059 | >>> switch_decimal = switch_decimal.format(decimal=decimal) | |
1060 | >>> re.sub(switch_decimal, '.', 'Sir, €1234,50 please.', flags=re.X) | |
1061 | 'Sir, €1234.50 please.' | |
1062 | >>> | |
1063 | >>> natsort_key('Sir, €1234.50 please.', as_float=True) | |
1064 | ('Sir, €', 1234.5, ' please.') | |
1065 | ||
1066 | Final Thoughts | |
1067 | -------------- | |
1068 | ||
1069 | My hope is that users of :mod:`natsort` never have to think about or worry | |
1070 | about all the bookkeeping or any of the details described above, and that using | |
1071 | :mod:`natsort` seems to magically "just work". For those of you who | |
1072 | took the time to read this engineering description, I hope it has enlightened | |
1073 | you to some of the issues that can be encountered when code is released | |
1074 | into the wild and has to accept "real-world data", or to what happens | |
1075 | to developers who naïvely make bold assumptions that are counter to | |
1076 | what the rest of the world assumes. | |
1077 | ||
1078 | .. rubric:: Footnotes | |
1079 | ||
1080 | .. [#f1] | |
1081 | To anyone looking through the actual code, you will note that I don't | |
1082 | actually use :mod:`pathlib` to split the paths... I wrote my own version | |
1083 | to avoid adding an external dependency of :mod:`pathlib` on Python < 3.4. | |
1084 | .. [#f2] | |
1085 | *"But if you hadn't removed the leading empty string from re.split this | |
1086 | wouldn't have happened!!"* I can hear you saying. Well, that's true. I don't | |
1087 | have a *great* reason for having done that except that in an earlier | |
1088 | non-optimal incarnation of the algorithm I needed to it, and it kind of | |
1089 | stuck, and it made other parts of the code easier if the assumption that | |
1090 | there were no empty strings was valid. | |
1091 | .. [#f3] | |
1092 | I'm not going to show how this is implemented in this document, | |
1093 | but if you are interested you can look at the code to | |
1094 | :func:`_sep_inserter` in `util.py`_. | |
1095 | .. [#f4] | |
1096 | Handling each of these is straightforward, but coupled with the rapidly | |
1097 | fracturing execution paths presented in :ref:`TL;DR 2 <tldr2>` one can imagine | |
1098 | this will get out of hand quickly. If you take a look at `natsort.py`_ and | |
1099 | `util.py`_ you can observe that to avoid this I take a more functional approach | |
1100 | to construting the :mod:`natsort` algorithm as opposed to the procedural approach | |
1101 | illustrated in :ref:`TL;DR 1 <tldr1>` and :ref:`TL;DR 2 <tldr2>`. | |
1102 | ||
1103 | .. _ASCII table: http://www.asciitable.com/ | |
1104 | .. _getting sorting right is surprisingly hard: http://www.compciv.org/guides/python/fundamentals/sorting-collections-with-sorted/ | |
1105 | .. _This astonished: https://github.com/SethMMorton/natsort/issues/19 | |
1106 | .. _a lot: http://stackoverflow.com/questions/29548742/python-natsort-sort-strings-recursively | |
1107 | .. _of people: http://stackoverflow.com/questions/24045348/sort-set-of-numbers-in-the-form-xx-yy-in-python | |
1108 | .. _and some people aren't very nice when they are astonished: | |
1109 | https://github.com/xolox/python-naturalsort/blob/ed3e6b6ffaca3bdea3b76e08acbb8bd2a5fee463/README.rst#why-another-natsort-module | |
1110 | .. _fastnumbers: https://github.com/SethMMorton/fastnumbers | |
1111 | .. _as part of my testing: https://github.com/SethMMorton/natsort/blob/master/test_natsort/slow_splitters.py | |
1112 | .. _this one for coercion: http://stackoverflow.com/questions/736043/checking-if-a-string-can-be-converted-to-float-in-python | |
1113 | .. _this one for checking: http://stackoverflow.com/questions/354038/how-do-i-check-if-a-string-is-a-number-float-in-python | |
1114 | .. _most natural sort solutions for python on Stack Overflow: http://stackoverflow.com/q/4836710/1399279 | |
1115 | .. _80%/20%: https://en.wikipedia.org/wiki/Pareto_principle | |
1116 | .. _The first major special case I encountered was sorting filesystem paths: https://github.com/SethMMorton/natsort/issues/3 | |
1117 | .. _The second major special case I encountered was sorting of different types: https://github.com/SethMMorton/natsort/issues/7 | |
1118 | .. _A rather unexpected special case I encountered was sorting collections containing NaN: | |
1119 | https://github.com/SethMMorton/natsort/issues/27 | |
1120 | .. _Path.parts: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.parts | |
1121 | .. _Path.suffixes: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.suffixes | |
1122 | .. _Path.stem: https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.stem | |
1123 | .. _It's hard to compare floating point numbers: http://www.drdobbs.com/cpp/its-hard-to-compare-floating-point-numbe/240149806 | |
1124 | .. _caught a bit off guard when the request was initially made: https://github.com/SethMMorton/natsort/issues/14 | |
1125 | .. _at the code: https://github.com/SethMMorton/natsort/tree/master/natsort | |
1126 | .. _natsort.py: https://github.com/SethMMorton/natsort/blob/master/natsort/natsort.py | |
1127 | .. _util.py: https://github.com/SethMMorton/natsort/blob/master/natsort/util.py | |
1128 | .. _although they do point out in the documentation that it will be painful to use: | |
1129 | https://docs.python.org/3/library/locale.html#background-details-hints-tips-and-caveats | |
1130 | .. _natsort.compat.locale.py: https://github.com/SethMMorton/natsort/blob/master/natsort/compat/locale.py | |
1131 | .. _Thousands separator support: https://github.com/SethMMorton/natsort/issues/36 | |
1132 | .. _really good: https://hypothesis.readthedocs.io/en/latest/ | |
1133 | .. _testing strategy: http://doc.pytest.org/en/latest/ | |
1134 | .. _check out some official Unicode documentation: http://unicode.org/reports/tr15/ |
2 | 2 | You can adapt this file completely to your liking, but it should at least |
3 | 3 | contain the root `toctree` directive. |
4 | 4 | |
5 | natsort: Natural Sorting for Python | |
6 | =================================== | |
5 | natsort: Simple yet flexible natural sorting in Python. | |
6 | ======================================================= | |
7 | 7 | |
8 | 8 | Contents: |
9 | 9 | |
12 | 12 | :numbered: |
13 | 13 | |
14 | 14 | intro.rst |
15 | howitworks.rst | |
15 | 16 | examples.rst |
16 | 17 | api.rst |
17 | 18 | shell.rst |
3 | 3 | The :mod:`natsort` module |
4 | 4 | ========================= |
5 | 5 | |
6 | Natural sorting for python. | |
6 | Simple yet flexible natural sorting in Python. | |
7 | 7 | |
8 | 8 | - Source Code: https://github.com/SethMMorton/natsort |
9 | - Downloads: https://pypi.python.org/pypi/natsort | |
10 | - Documentation: http://pythonhosted.org/natsort/ | |
9 | - Downloads: https://pypi.org/project/natsort/ | |
10 | - Documentation: http://natsort.readthedocs.io/ | |
11 | - Optional Dependencies: | |
12 | ||
13 | - `fastnumbers <https://pypi.org/project/fastnumbers>`_ >= 2.0.0 | |
14 | - `PyICU <https://pypi.org/project/PyICU>`_ >= 1.0.0 | |
15 | ||
16 | :mod:`natsort` is a general utility for sorting lists *naturally*; the definition | |
17 | of "naturally" is not well-defined, but the most common definition is that numbers | |
18 | contained within the string should be sorted as numbers and not as you would | |
19 | other characters. If you need to present sorted output to a user, you probably | |
20 | want to sort it naturally. | |
11 | 21 | |
12 | 22 | :mod:`natsort` was initially created for sorting scientific output filenames that |
13 | contained floating point numbers in the names. There was a serious lack of | |
23 | contained signed floating point numbers in the names. There was a lack of | |
14 | 24 | algorithms out there that could perform a natural sort on `floats` but |
15 | 25 | plenty for `ints`; check out |
16 | 26 | `this StackOverflow question <http://stackoverflow.com/q/4836710/1399279>`_ |
18 | 28 | `this ActiveState forum <http://code.activestate.com/recipes/285264-natural-string-sorting/>`_, |
19 | 29 | and of course `this great article on natural sorting <http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/>`_ |
20 | 30 | from CodingHorror.com for examples of what I mean. |
21 | :mod:`natsort` was created to fill in this gap. It has since grown | |
22 | and can now sort version numbers (which seems to be the | |
23 | most common use case based on user feedback) as well as some other nice features. | |
31 | :mod:`natsort` was created to fill in this gap, but has since expanded to handle | |
32 | just about any definition of a number, as well as other sorting customizations. | |
24 | 33 | |
25 | 34 | Quick Description |
26 | 35 | ----------------- |
27 | 36 | |
28 | 37 | When you try to sort a list of strings that contain numbers, the normal python |
29 | 38 | sort algorithm sorts lexicographically, so you might not get the results that you |
30 | expect:: | |
31 | ||
32 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
39 | expect: | |
40 | ||
41 | .. code-block:: python | |
42 | ||
43 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
33 | 44 | >>> sorted(a) |
34 | ['a1', 'a10', 'a2', 'a4', 'a9'] | |
45 | ['1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '2 ft 7 in', '7 ft 6 in'] | |
35 | 46 | |
36 | 47 | Notice that it has the order ('1', '10', '2') - this is because the list is |
37 | 48 | being sorted in lexicographical order, which sorts numbers like you would |
38 | 49 | letters (i.e. 'b', 'ba', 'c'). |
39 | 50 | |
40 | 51 | :mod:`natsort` provides a function :func:`~natsorted` that helps sort lists |
41 | "naturally", either as real numbers (i.e. signed/unsigned floats or ints), | |
42 | or as versions. Using :func:`~natsorted` is simple:: | |
52 | "naturally" ("naturally" is rather ill-defined, but in general it means | |
53 | sorting based on meaning and not computer code point).. | |
54 | Using :func:`~natsorted` is simple: | |
55 | ||
56 | .. code-block:: python | |
43 | 57 | |
44 | 58 | >>> from natsort import natsorted |
45 | >>> a = ['a2', 'a9', 'a1', 'a4', 'a10'] | |
46 | >>> natsorted(a) | |
47 | ['a1', 'a2', 'a4', 'a9', 'a10'] | |
59 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
60 | >>> natsorted(a) | |
61 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
48 | 62 | |
49 | 63 | :func:`~natsorted` identifies numbers anywhere in a string and sorts them |
50 | naturally. | |
51 | ||
52 | Sorting versions is handled properly by default (as of :mod:`natsort` version >= 4.0.0): | |
64 | naturally. Below are some other things you can do with :mod:`natsort` | |
65 | (please see the :ref:`examples` for a quick start guide, or the :ref:`api` | |
66 | for more details). | |
67 | ||
68 | .. note:: | |
69 | ||
70 | :func:`~natsorted` is designed to be a drop-in replacement for the built-in | |
71 | :func:`sorted` function. Like :func:`sorted`, :func:`~natsorted` | |
72 | `does not sort in-place`. To sort a list and assign the output to the | |
73 | same variable, you must explicitly assign the output to a variable: | |
74 | ||
75 | .. code-block:: python | |
76 | ||
77 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
78 | >>> natsorted(a) | |
79 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
80 | >>> print(a) # 'a' was not sorted; "natsorted" simply returned a sorted list | |
81 | ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
82 | >>> a = natsorted(a) # Now 'a' will be sorted because the sorted list was assigned to 'a' | |
83 | >>> print(a) | |
84 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
85 | ||
86 | Please see `Generating a Reusable Sorting Key and Sorting In-Place`_ for | |
87 | an alternate way to sort in-place naturally. | |
88 | ||
89 | Examples | |
90 | -------- | |
91 | ||
92 | Sorting Versions | |
93 | ++++++++++++++++ | |
94 | ||
95 | This is handled properly by default (as of :mod:`natsort` version >= 4.0.0): | |
53 | 96 | |
54 | 97 | .. code-block:: python |
55 | 98 | |
60 | 103 | If you need to sort release candidates, please see :ref:`rc_sorting` for |
61 | 104 | a useful hack. |
62 | 105 | |
63 | You can also perform locale-aware sorting (or "human sorting"), where the | |
64 | non-numeric characters are ordered based on their meaning, not on their | |
65 | ordinal value; this can be achieved with the :func:`~humansorted` function: | |
66 | ||
67 | .. code-block:: python | |
68 | ||
69 | >>> a = ['Apple', 'Banana', 'apple', 'banana'] | |
70 | >>> natsorted(a) | |
71 | ['Apple', 'Banana', 'apple', 'banana'] | |
106 | Sorting by Real Numbers (i.e. Signed Floats) | |
107 | ++++++++++++++++++++++++++++++++++++++++++++ | |
108 | ||
109 | This is useful in scientific data analysis and was | |
110 | the default behavior of :func:`~natsorted` for :mod:`natsort` | |
111 | version < 4.0.0. Use the :func:`~realsorted` function: | |
112 | ||
113 | .. code-block:: python | |
114 | ||
115 | >>> from natsort import realsorted, ns | |
116 | >>> # Note that when interpreting as signed floats, the below numbers are | |
117 | >>> # +5.10, -3.00, +5.30, +2.00 | |
118 | >>> a = ['position5.10.data', 'position-3.data', 'position5.3.data', 'position2.data'] | |
119 | >>> natsorted(a) | |
120 | ['position2.data', 'position5.3.data', 'position5.10.data', 'position-3.data'] | |
121 | >>> natsorted(a, alg=ns.REAL) | |
122 | ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] | |
123 | >>> realsorted(a) # shortcut for natsorted with alg=ns.REAL | |
124 | ['position-3.data', 'position2.data', 'position5.10.data', 'position5.3.data'] | |
125 | ||
126 | Locale-Aware Sorting (or "Human Sorting") | |
127 | +++++++++++++++++++++++++++++++++++++++++ | |
128 | ||
129 | This is where the non-numeric characters are ordered based on their meaning, | |
130 | not on their ordinal value, and a locale-dependent thousands separator and decimal | |
131 | separator is accounted for in the number. | |
132 | This can be achieved with the :func:`~humansorted` function: | |
133 | ||
134 | .. code-block:: python | |
135 | ||
136 | >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] | |
137 | >>> natsorted(a) | |
138 | ['Apple', 'Banana', 'apple14,689', 'apple15', 'banana'] | |
72 | 139 | >>> import locale |
73 | 140 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') |
74 | 141 | 'en_US.UTF-8' |
142 | >>> natsorted(a, alg=ns.LOCALE) | |
143 | ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] | |
75 | 144 | >>> from natsort import humansorted |
76 | 145 | >>> humansorted(a) |
77 | ['apple', 'Apple', 'banana', 'Banana'] | |
146 | ['apple15', 'apple14,689', 'Apple', 'banana', 'Banana'] | |
78 | 147 | |
79 | 148 | You may find you need to explicitly set the locale to get this to work |
80 | 149 | (as shown in the example). |
81 | Please see :ref:`bug_note` and the Installation section | |
150 | Please see :ref:`locale_issues` and the Installation section | |
82 | 151 | below before using the :func:`~humansorted` function. |
83 | 152 | |
84 | You can sort signed floats (i.e. real numbers) using the :func:`~realsorted`; | |
85 | this is useful in scientific data analysis. This was the default behavior of | |
86 | :func:`~natsorted` for :mod:`natsort` version < 4.0.0: | |
87 | ||
88 | .. code-block:: python | |
89 | ||
90 | >>> from natsort import realsorted | |
91 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
92 | >>> natsorted(a) | |
93 | ['num2', 'num5.3', 'num5.10', 'num-3'] | |
94 | >>> realsorted(a) | |
95 | ['num-3', 'num2', 'num5.10', 'num5.3'] | |
153 | Further Customizing Natsort | |
154 | +++++++++++++++++++++++++++ | |
155 | ||
156 | If you need to combine multiple algorithm modifiers (such as ``ns.REAL``, | |
157 | ``ns.LOCALE``, and ``ns.IGNORECASE``), you can combine the options using the | |
158 | bitwise OR operator (``|``). For example, | |
159 | ||
160 | .. code-block:: python | |
161 | ||
162 | >>> a = ['Apple', 'apple15', 'Banana', 'apple14,689', 'banana'] | |
163 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) | |
164 | ['Apple', 'apple15', 'apple14,689', 'Banana', 'banana'] | |
165 | >>> # The ns enum provides long and short forms for each option. | |
166 | >>> ns.LOCALE == ns.L | |
167 | True | |
168 | >>> # You can also customize the convenience functions, too. | |
169 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == realsorted(a, alg=ns.L | ns.IC) | |
170 | True | |
171 | >>> natsorted(a, alg=ns.REAL | ns.LOCALE | ns.IGNORECASE) == humansorted(a, alg=ns.R | ns.IC) | |
172 | True | |
173 | ||
174 | All of the available customizations can be found in the documentation for | |
175 | the :class:`~natsort.ns` enum. | |
176 | ||
177 | You can also add your own custom transformation functions with the ``key`` argument. | |
178 | These can be used with ``alg`` if you wish: | |
179 | ||
180 | .. code-block:: python | |
181 | ||
182 | >>> a = ['apple2.50', '2.3apple'] | |
183 | >>> natsorted(a, key=lambda x: x.replace('apple', ''), alg=ns.REAL) | |
184 | ['2.3apple', 'apple2.50'] | |
185 | ||
186 | Sorting Mixed Types | |
187 | +++++++++++++++++++ | |
96 | 188 | |
97 | 189 | You can mix and match ``int``, ``float``, and ``str`` (or ``unicode``) types |
98 | when you sort:: | |
190 | when you sort: | |
191 | ||
192 | .. code-block:: python | |
99 | 193 | |
100 | 194 | >>> a = ['4.5', 6, 2.0, '5', 'a'] |
101 | 195 | >>> natsorted(a) |
103 | 197 | >>> # On Python 2, sorted(a) would return [2.0, 6, '4.5', '5', 'a'] |
104 | 198 | >>> # On Python 3, sorted(a) would raise an "unorderable types" TypeError |
105 | 199 | |
200 | Handling Bytes on Python 3 | |
201 | ++++++++++++++++++++++++++ | |
202 | ||
106 | 203 | :mod:`natsort` does not officially support the `bytes` type on Python 3, but |
107 | convenience functions are provided that help you decode to `str` first:: | |
204 | convenience functions are provided that help you decode to `str` first: | |
205 | ||
206 | .. code-block:: python | |
108 | 207 | |
109 | 208 | >>> from natsort import as_utf8 |
110 | 209 | >>> a = [b'a', 14.0, 'b'] |
118 | 217 | >>> natsorted(a, key=as_utf8) == [b'a5', b'a6', b'a40', b'a56'] |
119 | 218 | True |
120 | 219 | |
121 | The natsort algorithm does other fancy things like | |
220 | Generating a Reusable Sorting Key and Sorting In-Place | |
221 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
222 | ||
223 | Under the hood, :func:`~natsorted` works by generating a custom sorting | |
224 | key using :func:`~natsort_keygen` and then passes that to the built-in | |
225 | :func:`sorted`. You can use the :func:`~natsort_keygen` function yourself to | |
226 | generate a custom sorting key to sort in-place using the :meth:`list.sort` | |
227 | method. | |
228 | ||
229 | .. code-block:: python | |
230 | ||
231 | >>> from natsort import natsort_keygen | |
232 | >>> natsort_key = natsort_keygen() | |
233 | >>> a = ['2 ft 7 in', '1 ft 5 in', '10 ft 2 in', '2 ft 11 in', '7 ft 6 in'] | |
234 | >>> natsorted(a) == sorted(a, key=natsort_key) | |
235 | True | |
236 | >>> a.sort(key=natsort_key) | |
237 | >>> a | |
238 | ['1 ft 5 in', '2 ft 7 in', '2 ft 11 in', '7 ft 6 in', '10 ft 2 in'] | |
239 | ||
240 | All of the algorithm customizations mentioned in the `Further Customizing Natsort`_ | |
241 | section can also be applied to :func:`~natsort_keygen` through the *alg* keyword option. | |
242 | ||
243 | Other Useful Things | |
244 | +++++++++++++++++++ | |
122 | 245 | |
123 | 246 | - recursively descend into lists of lists |
124 | - control the case-sensitivity | |
125 | - sort file paths correctly | |
126 | - allow custom sorting keys | |
127 | - exposes a natsort_key generator to pass to list.sort | |
128 | ||
129 | Please see the :ref:`examples` for a quick start guide, or the :ref:`api` | |
130 | for more details. | |
247 | - automatic unicode normalization of input data | |
248 | - controlling the case-sensitivity (see :ref:`case_sort`) | |
249 | - sorting file paths correctly (see :ref:`path_sort`) | |
250 | - allow custom sorting keys (see :ref:`custom_sort`) | |
251 | ||
252 | FAQ | |
253 | --- | |
254 | ||
255 | How do I debug :func:`~natsorted`? | |
256 | The best way to debug :func:`~natsorted` is to generate a key using :func:`~natsort_keygen` | |
257 | with the same options being passed to :func:`~natsorted`. One can take a look at | |
258 | exactly what is being done with their input using this key - it is highly recommended | |
259 | to `look at this issue describing how to debug <https://github.com/SethMMorton/natsort/issues/13#issuecomment-50422375>`_ | |
260 | for *how* to debug, and also to review the | |
261 | `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_ | |
262 | page for *why* :mod:`natsort` is doing that to your data. | |
263 | ||
264 | If you are trying to sort custom classes and running into trouble, please take a look at | |
265 | https://github.com/SethMMorton/natsort/issues/60. In short, | |
266 | custom classes are not likely to be sorted correctly if one relies | |
267 | on the behavior of ``__lt__`` and the other rich comparison operators in their | |
268 | custom class - it is better to use a ``key`` function with :mod:`natsort`, or | |
269 | use the :mod:`natsort` key as part of your rich comparison operator definition. | |
270 | ||
271 | How *does* :mod:`natsort` work? | |
272 | If you don't want to read `How Does Natsort Work? <http://natsort.readthedocs.io/en/master/howitworks.html>`_, | |
273 | here is a quick primer. | |
274 | ||
275 | :mod:`natsort` provides a `key function <https://docs.python.org/3/howto/sorting.html#key-functions>`_ | |
276 | that can be passed to `list.sort() <https://docs.python.org/3/library/stdtypes.html#list.sort>`_ | |
277 | or `sorted() <https://docs.python.org/3/library/functions.html#sorted>`_ in order to | |
278 | modify the default sorting behavior. This key is generated on-demand with the | |
279 | key generator :func:`natsort.natsort_keygen`. :func:`natsort.natsorted` is essentially | |
280 | a wrapper for the following code: | |
281 | ||
282 | .. code-block:: python | |
283 | ||
284 | >>> from natsort import natsort_keygen | |
285 | >>> natsort_key = natsort_keygen() | |
286 | >>> sorted(['1', '10', '2'], key=natsort_key) | |
287 | ['1', '2', '10'] | |
288 | ||
289 | Users can further customize :mod:`natsort` sorting behavior with the ``key`` | |
290 | and/or ``alg`` options (see details in the `Further Customizing Natsort`_ | |
291 | section). | |
292 | ||
293 | The key generated by :func:`natsort.natsort_keygen` *always* returns a :class:`tuple`. It | |
294 | does so in the following way (*some details omitted for clarity*): | |
295 | ||
296 | 1. Assume the input is a string, and attempt to split it into numbers and | |
297 | non-numbers using regular expressions. Numbers are then converted into | |
298 | either :class:`int` or :class:`float`. | |
299 | 2. If the above fails because the input is not a string, assume the input | |
300 | is some other sequence (e.g. :class:`list` or :class:`tuple`), and recursively | |
301 | apply the key to each element of the sequence. | |
302 | 3. If the above fails because the input is not iterable, assume the input | |
303 | is an :class:`int` or :class:`float`, and just return the input in a :class:`tuple`. | |
304 | ||
305 | Because a :class:`tuple` is always returned, a :exc:`TypeError` should not be common | |
306 | unless one tries to do something odd like sort an :class:`int` against a :class:`list`. | |
307 | ||
308 | :mod:`natsort` gave me results I didn't expect, and it's a terrible library! | |
309 | Did you try to debug using the above advice? If so, and you still cannot figure out | |
310 | the error, then please `file an issue <https://github.com/SethMMorton/natsort/issues/new>`_. | |
311 | ||
312 | Shell script | |
313 | ------------ | |
314 | ||
315 | :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called | |
316 | from the command line with ``python -m natsort``. | |
317 | ||
318 | Requirements | |
319 | ------------ | |
320 | ||
321 | :mod:`natsort` requires Python version 2.6 or greater or Python 3.3 or greater. | |
322 | It may run on (but is not tested against) Python 3.2. | |
323 | ||
324 | Optional Dependencies | |
325 | --------------------- | |
326 | ||
327 | fastnumbers | |
328 | +++++++++++ | |
329 | ||
330 | The most efficient sorting can occur if you install the | |
331 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ package | |
332 | (version >=0.7.1); it helps with the string to number conversions. | |
333 | :mod:`natsort` will still run (efficiently) without the package, but if you need | |
334 | to squeeze out that extra juice it is recommended you include this as a dependency. | |
335 | :mod:`natsort` will not require (or check) that | |
336 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ is installed | |
337 | at installation. | |
338 | ||
339 | PyICU | |
340 | +++++ | |
341 | ||
342 | It is recommended that you install `PyICU <https://pypi.org/project/PyICU>`_ | |
343 | if you wish to sort in a locale-dependent manner, see | |
344 | http://natsort.readthedocs.io/en/master/locale_issues.html for an explanation why. | |
131 | 345 | |
132 | 346 | Installation |
133 | 347 | ------------ |
134 | 348 | |
135 | Installation of :mod:`natsort` is ultra-easy. Simply execute from the | |
136 | command line:: | |
137 | ||
138 | easy_install natsort | |
139 | ||
140 | or, if you have ``pip`` (preferred over ``easy_install``):: | |
141 | ||
142 | pip install natsort | |
143 | ||
144 | Both of the above commands will download the source for you. | |
145 | ||
146 | You can also download the source from http://pypi.python.org/pypi/natsort, | |
147 | or browse the git repository at https://github.com/SethMMorton/natsort. | |
148 | ||
149 | If you choose to install from source, you can unzip the source archive and | |
150 | enter the directory, and type:: | |
151 | ||
152 | python setup.py install | |
153 | ||
154 | If you wish to run the unit tests, enter:: | |
155 | ||
156 | python setup.py test | |
157 | ||
158 | If you want to build this documentation, enter:: | |
159 | ||
160 | python setup.py build_sphinx | |
161 | ||
162 | :mod:`natsort` requires Python version 2.7 or greater or Python 3.2 or greater. | |
163 | ||
164 | The most efficient sorting can occur if you install the | |
165 | `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ package (it helps | |
166 | with the string to number conversions.) ``natsort`` will still run (efficiently) | |
167 | without the package, but if you need to squeeze out that extra juice it is | |
168 | recommended you include this as a dependency. ``natsort`` will not require (or | |
169 | check) that `fastnumbers <https://pypi.python.org/pypi/fastnumbers>`_ is installed. | |
170 | ||
171 | On BSD-based systems (this includes Mac OS X), the underlying ``locale`` library | |
172 | can be buggy (please see http://bugs.python.org/issue23195); ``locale`` is | |
173 | used for the ``ns.LOCALE`` option and ``humansorted`` function.. To remedy this, | |
174 | one can | |
175 | ||
176 | 1. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
177 | locale. These locales do not suffer from as many problems as "UTF-8" | |
178 | and thus should give expected results. | |
179 | 2. Use `PyICU <https://pypi.python.org/pypi/PyICU>`_. If | |
180 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is installed, ``natsort`` | |
181 | will use it under the hood; this will give more | |
182 | reliable cross-platform results in the long run. ``natsort`` will not | |
183 | require (or check) that `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
184 | is installed at installation. Please visit | |
185 | https://github.com/SethMMorton/natsort/issues/21 for more details and | |
186 | how to install on Mac OS X. **Please note** that using | |
187 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ is the only way to | |
188 | guarantee correct results for all input on BSD-based systems, since | |
189 | every other suggestion is a workaround. | |
190 | 3. Do nothing. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
191 | to compensate for a broken ``locale`` library in terms of case-handling; | |
192 | if you do not need to be able to properly handle non-ASCII characters | |
193 | then this may be the best option for you. | |
194 | ||
195 | Note that the above solutions *should not* be required for Windows or | |
196 | Linux since in Linux-based systems and Windows systems ``locale`` *should* work | |
197 | just fine. | |
198 | ||
199 | :mod:`natsort` comes with a shell script called :mod:`natsort`, or can also be called | |
200 | from the command line with ``python -m natsort``. The command line script is | |
201 | only installed onto your ``PATH`` if you don't install via a wheel. There is | |
202 | apparently a known bug with the wheel installation process that will not create | |
203 | entry points. | |
349 | Use ``pip``! | |
350 | ||
351 | .. code-block:: sh | |
352 | ||
353 | $ pip install natsort | |
354 | ||
355 | If you want to install the `Optional Dependencies`_, you can use the | |
356 | `"extras" notation <https://packaging.python.org/tutorials/installing-packages/#installing-setuptools-extras>`_ | |
357 | at installation time to install those dependencies as well - use ``fast`` for | |
358 | `fastnumbers <https://pypi.org/project/fastnumbers>`_ and ``icu`` for | |
359 | `PyICU <https://pypi.org/project/PyICU>`_. | |
360 | ||
361 | .. code-block:: sh | |
362 | ||
363 | # Install both optional dependencies. | |
364 | $ pip install natsort[fast,icu] | |
365 | # Install just fastnumbers | |
366 | $ pip install natsort[fast] | |
367 | ||
368 | How to Run Tests | |
369 | ---------------- | |
370 | ||
371 | Please note that :mod:`natsort` is NOT set-up to support ``python setup.py test``. | |
372 | ||
373 | The recommended way to run tests is with `tox <https://tox.readthedocs.io/en/latest/>`_. | |
374 | After installing ``tox``, running tests is as simple as executing the following in the | |
375 | ``natsort`` directory: | |
376 | ||
377 | .. code-block:: sh | |
378 | ||
379 | $ tox | |
380 | ||
381 | ``tox`` will create virtual a virtual environment for your tests and install all the | |
382 | needed testing requirements for you. You can specify a particular python version | |
383 | with the ``-e`` flag, e.g. ``tox -e py36``. | |
384 | ||
385 | If you do not wish to use ``tox``, you can install the testing dependencies and run the | |
386 | tests manually using `pytest <https://docs.pytest.org/en/latest/>`_ - ``natsort`` | |
387 | contains a ``Pipfile`` for use with `pipenv <https://github.com/pypa/pipenv>`_ that | |
388 | makes it easy for you to install the testing dependencies: | |
389 | ||
390 | .. code-block:: sh | |
391 | ||
392 | $ pipenv install --skip-lock --dev | |
393 | $ pipenv run python -m pytest | |
394 | ||
395 | Note that above I invoked ``python -m pytest`` instead of just ``pytest`` - this is because | |
396 | `the former puts the CWD on sys.path <https://docs.pytest.org/en/latest/usage.html#calling-pytest-through-python-m-pytest>`_.⏎ |
0 | .. default-domain:: py | |
1 | .. currentmodule:: natsort | |
2 | ||
3 | .. _locale_issues: | |
4 | ||
5 | Possible Issues with :func:`~natsort.humansorted` or ``ns.LOCALE`` | |
6 | ================================================================== | |
7 | ||
8 | Being Locale-Aware Means Both Numbers and Non-Numbers | |
9 | ----------------------------------------------------- | |
10 | ||
11 | In addition to modifying how characters are sorted, ``ns.LOCALE`` will take into | |
12 | account locale-dependent thousands separators (and locale-dependent decimal | |
13 | separators if ``ns.FLOAT`` is enabled). This means that if you are in a | |
14 | locale that uses commas as the thousands separator, a number like | |
15 | ``123,456`` will be interpreted as ``123456``. If this is not what you want, | |
16 | you may consider using ``ns.LOCALEALPHA`` which will only enable locale-aware | |
17 | sorting for non-numbers (similarly, ``ns.LOCALENUM`` enables locale-aware | |
18 | sorting only for numbers). | |
19 | ||
20 | Regenerate Key With :func:`~natsort.natsort_keygen` After Changing Locale | |
21 | ------------------------------------------------------------------------- | |
22 | ||
23 | When :func:`~natsort.natsort_keygen` is called it returns a key function that | |
24 | hard-codes the provided settings. This means that the key returned when | |
25 | ``ns.LOCALE`` is used contins the settings specifed by the locale | |
26 | *loaded at the time the key is generated*. If you change the locale, | |
27 | you should regenerate the key to account for the new locale. | |
28 | ||
29 | Corollary: Do Not Reuse :func:`~natsort.natsort_keygen` After Changing Locale | |
30 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
31 | ||
32 | If you change locale, the old function will not work as expected. | |
33 | The `locale <https://docs.python.org/3.5/library/locale.html>`_ library works | |
34 | with a global state. When :func:`~natsort.natsort_keygen` is called it does the | |
35 | best job that it can to make the returned function as static as possible and | |
36 | independent of the global state, but the | |
37 | `strxfrm <https://docs.python.org/3.5/library/locale.html#locale.strxfrm>`_ | |
38 | function must access this global state to work; therefore, if you change | |
39 | locale and use ``ns.LOCALE`` then you should discard the old key. | |
40 | ||
41 | .. note:: If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ then you | |
42 | may be able to reuse keys after changing locale. | |
43 | ||
44 | The `locale <https://docs.python.org/3.5/library/locale.html>`_ Module From the StdLib Has Issues | |
45 | ------------------------------------------------------------------------------------------------- | |
46 | ||
47 | :mod:`natsort` will use `PyICU <https://pypi.org/project/PyICU>`_ for | |
48 | :func:`~natsort.humansorted` or ``ns.LOCALE`` if it is installed. If not, | |
49 | it will fall back on the `locale <https://docs.python.org/3.5/library/locale.html>`_ | |
50 | library from the Python stdlib. If you do not have | |
51 | `PyICU <https://pypi.org/project/PyICU>`_ installed, please keep the | |
52 | following known problems and issues in mind. | |
53 | ||
54 | .. note:: Remember, if you have `PyICU <https://pypi.org/project/PyICU>`_ | |
55 | installed you shouldn't need to worry about any of these. | |
56 | ||
57 | Explicitly Set the Locale Before Using :func:`~natsort.humansorted` or ``ns.LOCALE`` | |
58 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
59 | ||
60 | I have found that unless you explicitly set a locale, the sorted order may not | |
61 | be what you expect. Setting this is straightforward | |
62 | (in the below example I use 'en_US.UTF-8', but you should use your | |
63 | locale):: | |
64 | ||
65 | >>> import locale | |
66 | >>> locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
67 | 'en_US.UTF-8' | |
68 | ||
69 | .. _bug_note: | |
70 | ||
71 | `locale <https://docs.python.org/3.5/library/locale.html>`_ Is Broken on Mac OS X | |
72 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ | |
73 | ||
74 | It's not Python's fault, but the OS... the locale library for BSD-based systems | |
75 | (of which Mac OS X is one) is broken. See the following links: | |
76 | ||
77 | - http://stackoverflow.com/questions/3412933/python-not-sorting-unicode-properly-strcoll-doesnt-help | |
78 | - http://bugs.python.org/issue23195 | |
79 | - https://github.com/SethMMorton/natsort/issues/21 (contains instructons on installing) | |
80 | - http://stackoverflow.com/questions/33459384/unicode-character-not-in-range-when-calling-locale-strxfrm | |
81 | - https://github.com/SethMMorton/natsort/issues/34 | |
82 | ||
83 | Of course, installing `PyICU <https://pypi.org/project/PyICU>`_ fixes this, | |
84 | but if you don't want to or cannot install this there is some hope. | |
85 | ||
86 | 1. As of ``natsort`` version 4.0.0, ``natsort`` is configured | |
87 | to compensate for a broken ``locale`` library. When sorting non-numbers | |
88 | it will handle case as you expect, but it will still not be able to | |
89 | comprehend non-ASCII characters properly. Additionally, it has | |
90 | a built-in lookup table of thousands separators that are incorrect | |
91 | on OS X/BSD (but is possible it is not complete... please file an | |
92 | issue if you see it is not complete) | |
93 | 2. Use "\*.ISO8859-1" locale (i.e. 'en_US.ISO8859-1') rather than "\*.UTF-8" | |
94 | locale. I have found that these have fewer issues than "UTF-8", but | |
95 | your mileage may vary. |
0 | News | |
1 | ==== | |
2 | ||
3 | 1.3 | |
4 | --- | |
5 | * Release date: 2012-11-01. | |
6 | * Source Code Pro is now used for code samples. | |
7 | * Reduced font size of pre elements. | |
8 | * Horizontal rule for header elements. | |
9 | * HTML pre contents are now wrapped (no scrollbars). | |
10 | * Changed permalink color from black to a lighter one. | |
11 | ||
12 | 1.2 | |
13 | --- | |
14 | * Release date: 2012-10-03. | |
15 | * Style additional admonition levels. | |
16 | * Increase padding for navigation links (minor). | |
17 | * Add shadow for admonition items (minor). | |
18 | ||
19 | 1.1 | |
20 | --- | |
21 | * Release date: 2012-09-05. | |
22 | * Add a new background. | |
23 | * Revert font of headings to Open Sans Light. | |
24 | * Darker color for h3 - h6. | |
25 | * Removed dependency on solarized dark pygments style. | |
26 | * Nice looking scrollbars for pre element. | |
27 | ||
28 | 1.0 | |
29 | --- | |
30 | * Release date: 2012-08-24. | |
31 | * Initial release. |
0 | Solar theme for Python Sphinx | |
1 | ============================= | |
2 | Solar is an attempt to create a theme for Sphinx based on the `Solarized <http://ethanschoonover.com/solarized>`_ color scheme. | |
3 | ||
4 | Preview | |
5 | ------- | |
6 | http://vimalkumar.in/sphinx-themes/solar | |
7 | ||
8 | Download | |
9 | -------- | |
10 | Released versions are available from http://github.com/vkvn/sphinx-themes/downloads | |
11 | ||
12 | Installation | |
13 | ------------ | |
14 | #. Extract the archive. | |
15 | #. Modify ``conf.py`` of an existing Sphinx project or create new project using ``sphinx-quickstart``. | |
16 | #. Change the ``html_theme`` parameter to ``solar``. | |
17 | #. Change the ``html_theme_path`` to the location containing the extracted archive. | |
18 | ||
19 | License | |
20 | ------- | |
21 | `GNU General Public License <http://www.gnu.org/licenses/gpl.html>`_. | |
22 | ||
23 | Credits | |
24 | ------- | |
25 | Modified from the default Sphinx theme -- Sphinxdoc | |
26 | ||
27 | Background pattern from http://subtlepatterns.com. |
0 | {% extends "basic/layout.html" %} | |
1 | ||
2 | {%- block doctype -%} | |
3 | <!DOCTYPE html> | |
4 | {%- endblock -%} | |
5 | ||
6 | {%- block extrahead -%} | |
7 | <link href='http://fonts.googleapis.com/css?family=Source+Code+Pro|Open+Sans:300italic,400italic,700italic,400,300,700' rel='stylesheet' type='text/css'> | |
8 | <link href="{{ pathto("_static/solarized-dark.css", 1) }}" rel="stylesheet"> | |
9 | {%- endblock -%} | |
10 | ||
11 | {# put the sidebar before the body #} | |
12 | {% block sidebar1 %}{{ sidebar() }}{% endblock %} | |
13 | {% block sidebar2 %}{% endblock %} | |
14 | ||
15 | {%- block footer %} | |
16 | <div class="footer"> | |
17 | {%- if show_copyright %} | |
18 | {%- if hasdoc('copyright') %} | |
19 | {% trans path=pathto('copyright'), copyright=copyright|e %}© <a href="{{ path }}">Copyright</a> {{ copyright }}.{% endtrans %} | |
20 | {%- else %} | |
21 | {% trans copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %} | |
22 | {%- endif %} | |
23 | {%- endif %} | |
24 | {%- if last_updated %} | |
25 | {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %} | |
26 | {%- endif %} | |
27 | {%- if show_sphinx %} | |
28 | {% trans sphinx_version=sphinx_version|e %}Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> {{ sphinx_version }}.Theme by <a href="http://github.com/vkvn">vkvn</a>{% endtrans %} | |
29 | {%- endif %} | |
30 | </div> | |
31 | {%- endblock %} |
0 | /* solar.css | |
1 | * Modified from sphinxdoc.css of the sphinxdoc theme. | |
2 | */ | |
3 | ||
4 | @import url("basic.css"); | |
5 | ||
6 | /* -- page layout ----------------------------------------------------------- */ | |
7 | ||
8 | body { | |
9 | font-family: 'Open Sans', sans-serif; | |
10 | font-size: 14px; | |
11 | line-height: 150%; | |
12 | text-align: center; | |
13 | color: #002b36; | |
14 | padding: 0; | |
15 | margin: 0px 80px 0px 80px; | |
16 | min-width: 740px; | |
17 | -moz-box-shadow: 0px 0px 10px #93a1a1; | |
18 | -webkit-box-shadow: 0px 0px 10px #93a1a1; | |
19 | box-shadow: 0px 0px 10px #93a1a1; | |
20 | background: url("subtle_dots.png") repeat; | |
21 | ||
22 | } | |
23 | ||
24 | div.document { | |
25 | background-color: #fcfcfc; | |
26 | text-align: left; | |
27 | background-repeat: repeat-x; | |
28 | } | |
29 | ||
30 | div.bodywrapper { | |
31 | margin: 0 240px 0 0; | |
32 | border-right: 1px dotted #eee8d5; | |
33 | } | |
34 | ||
35 | div.body { | |
36 | background-color: white; | |
37 | margin: 0; | |
38 | padding: 0.5em 20px 20px 20px; | |
39 | } | |
40 | ||
41 | div.related { | |
42 | font-size: 1em; | |
43 | background: #002b36; | |
44 | color: #839496; | |
45 | padding: 5px 0px; | |
46 | } | |
47 | ||
48 | div.related ul { | |
49 | height: 2em; | |
50 | margin: 2px; | |
51 | } | |
52 | ||
53 | div.related ul li { | |
54 | margin: 0; | |
55 | padding: 0; | |
56 | height: 2em; | |
57 | float: left; | |
58 | } | |
59 | ||
60 | div.related ul li.right { | |
61 | float: right; | |
62 | margin-right: 5px; | |
63 | } | |
64 | ||
65 | div.related ul li a { | |
66 | margin: 0; | |
67 | padding: 2px 5px; | |
68 | line-height: 2em; | |
69 | text-decoration: none; | |
70 | color: #839496; | |
71 | } | |
72 | ||
73 | div.related ul li a:hover { | |
74 | background-color: #073642; | |
75 | -webkit-border-radius: 2px; | |
76 | -moz-border-radius: 2px; | |
77 | border-radius: 2px; | |
78 | } | |
79 | ||
80 | div.sphinxsidebarwrapper { | |
81 | padding: 0; | |
82 | } | |
83 | ||
84 | div.sphinxsidebar { | |
85 | margin: 0; | |
86 | padding: 0.5em 15px 15px 0; | |
87 | width: 210px; | |
88 | float: right; | |
89 | font-size: 0.9em; | |
90 | text-align: left; | |
91 | } | |
92 | ||
93 | div.sphinxsidebar h3, div.sphinxsidebar h4 { | |
94 | margin: 1em 0 0.5em 0; | |
95 | font-size: 1em; | |
96 | padding: 0.7em; | |
97 | background-color: #eeeff1; | |
98 | } | |
99 | ||
100 | div.sphinxsidebar h3 a { | |
101 | color: #2E3436; | |
102 | } | |
103 | ||
104 | div.sphinxsidebar ul { | |
105 | padding-left: 1.5em; | |
106 | margin-top: 7px; | |
107 | padding: 0; | |
108 | line-height: 150%; | |
109 | color: #586e75; | |
110 | } | |
111 | ||
112 | div.sphinxsidebar ul ul { | |
113 | margin-left: 20px; | |
114 | } | |
115 | ||
116 | div.sphinxsidebar input { | |
117 | border: 1px solid #eee8d5; | |
118 | } | |
119 | ||
120 | div.footer { | |
121 | background-color: #93a1a1; | |
122 | color: #eee; | |
123 | padding: 3px 8px 3px 0; | |
124 | clear: both; | |
125 | font-size: 0.8em; | |
126 | text-align: right; | |
127 | } | |
128 | ||
129 | div.footer a { | |
130 | color: #eee; | |
131 | text-decoration: none; | |
132 | } | |
133 | ||
134 | /* -- body styles ----------------------------------------------------------- */ | |
135 | ||
136 | p { | |
137 | margin: 0.8em 0 0.5em 0; | |
138 | } | |
139 | ||
140 | div.body a, div.sphinxsidebarwrapper a { | |
141 | color: #268bd2; | |
142 | text-decoration: none; | |
143 | } | |
144 | ||
145 | div.body a:hover, div.sphinxsidebarwrapper a:hover { | |
146 | border-bottom: 1px solid #268bd2; | |
147 | } | |
148 | ||
149 | h1, h2, h3, h4, h5, h6 { | |
150 | font-family: "Open Sans", sans-serif; | |
151 | font-weight: 300; | |
152 | } | |
153 | ||
154 | h1 { | |
155 | margin: 0; | |
156 | padding: 0.7em 0 0.3em 0; | |
157 | line-height: 1.2em; | |
158 | color: #002b36; | |
159 | text-shadow: #eee 0.1em 0.1em 0.1em; | |
160 | } | |
161 | ||
162 | h2 { | |
163 | margin: 1.3em 0 0.2em 0; | |
164 | padding: 0 0 10px 0; | |
165 | color: #073642; | |
166 | border-bottom: 1px solid #eee; | |
167 | } | |
168 | ||
169 | h3 { | |
170 | margin: 1em 0 -0.3em 0; | |
171 | padding-bottom: 5px; | |
172 | } | |
173 | ||
174 | h3, h4, h5, h6 { | |
175 | color: #073642; | |
176 | border-bottom: 1px dotted #eee; | |
177 | } | |
178 | ||
179 | div.body h1 a, div.body h2 a, div.body h3 a, div.body h4 a, div.body h5 a, div.body h6 a { | |
180 | color: #657B83!important; | |
181 | } | |
182 | ||
183 | h1 a.anchor, h2 a.anchor, h3 a.anchor, h4 a.anchor, h5 a.anchor, h6 a.anchor { | |
184 | display: none; | |
185 | margin: 0 0 0 0.3em; | |
186 | padding: 0 0.2em 0 0.2em; | |
187 | color: #aaa!important; | |
188 | } | |
189 | ||
190 | h1:hover a.anchor, h2:hover a.anchor, h3:hover a.anchor, h4:hover a.anchor, | |
191 | h5:hover a.anchor, h6:hover a.anchor { | |
192 | display: inline; | |
193 | } | |
194 | ||
195 | h1 a.anchor:hover, h2 a.anchor:hover, h3 a.anchor:hover, h4 a.anchor:hover, | |
196 | h5 a.anchor:hover, h6 a.anchor:hover { | |
197 | color: #777; | |
198 | background-color: #eee; | |
199 | } | |
200 | ||
201 | a.headerlink { | |
202 | color: #c60f0f!important; | |
203 | font-size: 1em; | |
204 | margin-left: 6px; | |
205 | padding: 0 4px 0 4px; | |
206 | text-decoration: none!important; | |
207 | } | |
208 | ||
209 | a.headerlink:hover { | |
210 | background-color: #ccc; | |
211 | color: white!important; | |
212 | } | |
213 | ||
214 | ||
215 | cite, code, tt { | |
216 | font-family: 'Source Code Pro', monospace; | |
217 | font-size: 0.9em; | |
218 | letter-spacing: 0.01em; | |
219 | background-color: #eeeff2; | |
220 | font-style: normal; | |
221 | } | |
222 | ||
223 | hr { | |
224 | border: 1px solid #eee; | |
225 | margin: 2em; | |
226 | } | |
227 | ||
228 | .highlight { | |
229 | -webkit-border-radius: 2px; | |
230 | -moz-border-radius: 2px; | |
231 | border-radius: 2px; | |
232 | } | |
233 | ||
234 | pre { | |
235 | font-family: 'Source Code Pro', monospace; | |
236 | font-style: normal; | |
237 | font-size: 0.9em; | |
238 | letter-spacing: 0.015em; | |
239 | line-height: 120%; | |
240 | padding: 0.7em; | |
241 | white-space: pre-wrap; /* css-3 */ | |
242 | white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ | |
243 | white-space: -pre-wrap; /* Opera 4-6 */ | |
244 | white-space: -o-pre-wrap; /* Opera 7 */ | |
245 | word-wrap: break-word; /* Internet Explorer 5.5+ */ | |
246 | } | |
247 | ||
248 | pre a { | |
249 | color: inherit; | |
250 | text-decoration: underline; | |
251 | } | |
252 | ||
253 | td.linenos pre { | |
254 | padding: 0.5em 0; | |
255 | } | |
256 | ||
257 | div.quotebar { | |
258 | background-color: #f8f8f8; | |
259 | max-width: 250px; | |
260 | float: right; | |
261 | padding: 2px 7px; | |
262 | border: 1px solid #ccc; | |
263 | } | |
264 | ||
265 | div.topic { | |
266 | background-color: #f8f8f8; | |
267 | } | |
268 | ||
269 | table { | |
270 | border-collapse: collapse; | |
271 | margin: 0 -0.5em 0 -0.5em; | |
272 | } | |
273 | ||
274 | table td, table th { | |
275 | padding: 0.2em 0.5em 0.2em 0.5em; | |
276 | } | |
277 | ||
278 | div.admonition { | |
279 | font-size: 0.9em; | |
280 | margin: 1em 0 1em 0; | |
281 | border: 1px solid #eee; | |
282 | background-color: #f7f7f7; | |
283 | padding: 0; | |
284 | -moz-box-shadow: 0px 8px 6px -8px #93a1a1; | |
285 | -webkit-box-shadow: 0px 8px 6px -8px #93a1a1; | |
286 | box-shadow: 0px 8px 6px -8px #93a1a1; | |
287 | } | |
288 | ||
289 | div.admonition p { | |
290 | margin: 0.5em 1em 0.5em 1em; | |
291 | padding: 0.2em; | |
292 | } | |
293 | ||
294 | div.admonition pre { | |
295 | margin: 0.4em 1em 0.4em 1em; | |
296 | } | |
297 | ||
298 | div.admonition p.admonition-title | |
299 | { | |
300 | margin: 0; | |
301 | padding: 0.2em 0 0.2em 0.6em; | |
302 | color: white; | |
303 | border-bottom: 1px solid #eee8d5; | |
304 | font-weight: bold; | |
305 | background-color: #268bd2; | |
306 | } | |
307 | ||
308 | div.warning p.admonition-title, | |
309 | div.important p.admonition-title { | |
310 | background-color: #cb4b16; | |
311 | } | |
312 | ||
313 | div.hint p.admonition-title, | |
314 | div.tip p.admonition-title { | |
315 | background-color: #859900; | |
316 | } | |
317 | ||
318 | div.caution p.admonition-title, | |
319 | div.attention p.admonition-title, | |
320 | div.danger p.admonition-title, | |
321 | div.error p.admonition-title { | |
322 | background-color: #dc322f; | |
323 | } | |
324 | ||
325 | div.admonition ul, div.admonition ol { | |
326 | margin: 0.1em 0.5em 0.5em 3em; | |
327 | padding: 0; | |
328 | } | |
329 | ||
330 | div.versioninfo { | |
331 | margin: 1em 0 0 0; | |
332 | border: 1px solid #eee; | |
333 | background-color: #DDEAF0; | |
334 | padding: 8px; | |
335 | line-height: 1.3em; | |
336 | font-size: 0.9em; | |
337 | } | |
338 | ||
339 | div.viewcode-block:target { | |
340 | background-color: #f4debf; | |
341 | border-top: 1px solid #eee; | |
342 | border-bottom: 1px solid #eee; | |
343 | } |
0 | /* solarized dark style for solar theme */ | |
1 | ||
2 | /*style pre scrollbar*/ | |
3 | pre::-webkit-scrollbar, .highlight::-webkit-scrollbar { | |
4 | height: 0.5em; | |
5 | background: #073642; | |
6 | } | |
7 | ||
8 | pre::-webkit-scrollbar-thumb { | |
9 | border-radius: 1em; | |
10 | background: #93a1a1; | |
11 | } | |
12 | ||
13 | /* pygments style */ | |
14 | .highlight .hll { background-color: #ffffcc } | |
15 | .highlight { background: #002B36!important; color: #93A1A1 } | |
16 | .highlight .c { color: #586E75 } /* Comment */ | |
17 | .highlight .err { color: #93A1A1 } /* Error */ | |
18 | .highlight .g { color: #93A1A1 } /* Generic */ | |
19 | .highlight .k { color: #859900 } /* Keyword */ | |
20 | .highlight .l { color: #93A1A1 } /* Literal */ | |
21 | .highlight .n { color: #93A1A1 } /* Name */ | |
22 | .highlight .o { color: #859900 } /* Operator */ | |
23 | .highlight .x { color: #CB4B16 } /* Other */ | |
24 | .highlight .p { color: #93A1A1 } /* Punctuation */ | |
25 | .highlight .cm { color: #586E75 } /* Comment.Multiline */ | |
26 | .highlight .cp { color: #859900 } /* Comment.Preproc */ | |
27 | .highlight .c1 { color: #586E75 } /* Comment.Single */ | |
28 | .highlight .cs { color: #859900 } /* Comment.Special */ | |
29 | .highlight .gd { color: #2AA198 } /* Generic.Deleted */ | |
30 | .highlight .ge { color: #93A1A1; font-style: italic } /* Generic.Emph */ | |
31 | .highlight .gr { color: #DC322F } /* Generic.Error */ | |
32 | .highlight .gh { color: #CB4B16 } /* Generic.Heading */ | |
33 | .highlight .gi { color: #859900 } /* Generic.Inserted */ | |
34 | .highlight .go { color: #93A1A1 } /* Generic.Output */ | |
35 | .highlight .gp { color: #93A1A1 } /* Generic.Prompt */ | |
36 | .highlight .gs { color: #93A1A1; font-weight: bold } /* Generic.Strong */ | |
37 | .highlight .gu { color: #CB4B16 } /* Generic.Subheading */ | |
38 | .highlight .gt { color: #93A1A1 } /* Generic.Traceback */ | |
39 | .highlight .kc { color: #CB4B16 } /* Keyword.Constant */ | |
40 | .highlight .kd { color: #268BD2 } /* Keyword.Declaration */ | |
41 | .highlight .kn { color: #859900 } /* Keyword.Namespace */ | |
42 | .highlight .kp { color: #859900 } /* Keyword.Pseudo */ | |
43 | .highlight .kr { color: #268BD2 } /* Keyword.Reserved */ | |
44 | .highlight .kt { color: #DC322F } /* Keyword.Type */ | |
45 | .highlight .ld { color: #93A1A1 } /* Literal.Date */ | |
46 | .highlight .m { color: #2AA198 } /* Literal.Number */ | |
47 | .highlight .s { color: #2AA198 } /* Literal.String */ | |
48 | .highlight .na { color: #93A1A1 } /* Name.Attribute */ | |
49 | .highlight .nb { color: #B58900 } /* Name.Builtin */ | |
50 | .highlight .nc { color: #268BD2 } /* Name.Class */ | |
51 | .highlight .no { color: #CB4B16 } /* Name.Constant */ | |
52 | .highlight .nd { color: #268BD2 } /* Name.Decorator */ | |
53 | .highlight .ni { color: #CB4B16 } /* Name.Entity */ | |
54 | .highlight .ne { color: #CB4B16 } /* Name.Exception */ | |
55 | .highlight .nf { color: #268BD2 } /* Name.Function */ | |
56 | .highlight .nl { color: #93A1A1 } /* Name.Label */ | |
57 | .highlight .nn { color: #93A1A1 } /* Name.Namespace */ | |
58 | .highlight .nx { color: #93A1A1 } /* Name.Other */ | |
59 | .highlight .py { color: #93A1A1 } /* Name.Property */ | |
60 | .highlight .nt { color: #268BD2 } /* Name.Tag */ | |
61 | .highlight .nv { color: #268BD2 } /* Name.Variable */ | |
62 | .highlight .ow { color: #859900 } /* Operator.Word */ | |
63 | .highlight .w { color: #93A1A1 } /* Text.Whitespace */ | |
64 | .highlight .mf { color: #2AA198 } /* Literal.Number.Float */ | |
65 | .highlight .mh { color: #2AA198 } /* Literal.Number.Hex */ | |
66 | .highlight .mi { color: #2AA198 } /* Literal.Number.Integer */ | |
67 | .highlight .mo { color: #2AA198 } /* Literal.Number.Oct */ | |
68 | .highlight .sb { color: #586E75 } /* Literal.String.Backtick */ | |
69 | .highlight .sc { color: #2AA198 } /* Literal.String.Char */ | |
70 | .highlight .sd { color: #93A1A1 } /* Literal.String.Doc */ | |
71 | .highlight .s2 { color: #2AA198 } /* Literal.String.Double */ | |
72 | .highlight .se { color: #CB4B16 } /* Literal.String.Escape */ | |
73 | .highlight .sh { color: #93A1A1 } /* Literal.String.Heredoc */ | |
74 | .highlight .si { color: #2AA198 } /* Literal.String.Interpol */ | |
75 | .highlight .sx { color: #2AA198 } /* Literal.String.Other */ | |
76 | .highlight .sr { color: #DC322F } /* Literal.String.Regex */ | |
77 | .highlight .s1 { color: #2AA198 } /* Literal.String.Single */ | |
78 | .highlight .ss { color: #2AA198 } /* Literal.String.Symbol */ | |
79 | .highlight .bp { color: #268BD2 } /* Name.Builtin.Pseudo */ | |
80 | .highlight .vc { color: #268BD2 } /* Name.Variable.Class */ | |
81 | .highlight .vg { color: #268BD2 } /* Name.Variable.Global */ | |
82 | .highlight .vi { color: #268BD2 } /* Name.Variable.Instance */ | |
83 | .highlight .il { color: #2AA198 } /* Literal.Number.Integer.Long */ |
Binary diff not shown
6 | 6 | ) |
7 | 7 | |
8 | 8 | # Local imports. |
9 | import sys | |
10 | ||
11 | from natsort.utils import chain_functions | |
12 | from natsort._version import __version__ | |
13 | ||
9 | 14 | from natsort.natsort import ( |
10 | 15 | natsort_key, |
11 | 16 | natsort_keygen, |
23 | 28 | as_utf8, |
24 | 29 | ns, |
25 | 30 | ) |
26 | from natsort._version import __version__ | |
31 | ||
32 | if float(sys.version[:3]) < 3: | |
33 | from natsort.natsort import natcmp | |
27 | 34 | |
28 | 35 | __all__ = [ |
29 | 36 | 'natsort_key', |
30 | 37 | 'natsort_keygen', |
31 | 38 | 'natsorted', |
32 | 'versorted' | |
39 | 'versorted', | |
33 | 40 | 'humansorted', |
34 | 41 | 'realsorted', |
35 | 42 | 'index_natsorted', |
38 | 45 | 'index_realsorted', |
39 | 46 | 'order_by_index', |
40 | 47 | 'decoder', |
48 | 'natcmp', | |
41 | 49 | 'as_ascii', |
42 | 50 | 'as_utf8', |
43 | 51 | 'ns', |
52 | 'chain_functions', | |
44 | 53 | ] |
54 | ||
55 | # Add the ns keys to this namespace for convenience. | |
56 | globals().update( | |
57 | dict((k, v) for k, v in vars(ns).items() if not k.startswith('_')) | |
58 | ) |
10 | 10 | |
11 | 11 | # Local imports. |
12 | 12 | from natsort.natsort import natsorted, ns |
13 | from natsort.utils import _regex_and_num_function_chooser | |
13 | from natsort.utils import _regex_chooser | |
14 | 14 | from natsort._version import __version__ |
15 | 15 | from natsort.compat.py23 import py23_str |
16 | 16 | |
102 | 102 | """\ |
103 | 103 | Verifies that that given range has a low lower than the high. |
104 | 104 | If the condition is not met, a ValueError is raised. |
105 | Otherwise, the values are returned, but as floats. | |
106 | """ | |
107 | low, high = float(low), float(high) | |
105 | Otherwise the input is returned as-is. | |
106 | """ | |
108 | 107 | if low >= high: |
109 | 108 | raise ValueError('low >= high') |
110 | 109 | else: |
116 | 115 | Check that the low value of the filter is lower than the high. |
117 | 116 | If there is to be no filter, return 'None'. |
118 | 117 | If the condition is not met, a ValueError is raised. |
119 | Otherwise, the values are returned, but as floats. | |
118 | Otherwise, the values are returned as-is. | |
120 | 119 | """ |
121 | 120 | # Quick return if no filter. |
122 | 121 | if not filt: |
170 | 169 | if do_filter or args.exclude: |
171 | 170 | inp_options = (ns.FLOAT * is_float | |
172 | 171 | ns.SIGNED * signed | |
173 | ns.NOEXP * (not args.exp), | |
174 | '.' | |
172 | ns.NOEXP * (not args.exp) | |
175 | 173 | ) |
176 | regex, num_function = _regex_and_num_function_chooser[inp_options] | |
174 | regex = _regex_chooser[inp_options] | |
177 | 175 | if args.filter is not None: |
178 | 176 | lows, highs = ([f[0] for f in args.filter], |
179 | 177 | [f[1] for f in args.filter]) |
180 | 178 | entries = [entry for entry in entries |
181 | 179 | if keep_entry_range(entry, lows, highs, |
182 | num_function, regex)] | |
180 | float, regex)] | |
183 | 181 | if args.reverse_filter is not None: |
184 | 182 | lows, highs = ([f[0] for f in args.reverse_filter], |
185 | 183 | [f[1] for f in args.reverse_filter]) |
186 | 184 | entries = [entry for entry in entries |
187 | 185 | if not keep_entry_range(entry, lows, highs, |
188 | num_function, regex)] | |
186 | float, regex)] | |
189 | 187 | if args.exclude: |
190 | 188 | exclude = set(args.exclude) |
191 | 189 | entries = [entry for entry in entries |
192 | 190 | if exclude_entry(entry, exclude, |
193 | num_function, regex)] | |
191 | float, regex)] | |
194 | 192 | |
195 | 193 | # Print off the sorted results |
196 | 194 | for entry in natsorted(entries, reverse=args.reverse, alg=alg): |
11 | 11 | ) |
12 | 12 | |
13 | 13 | # Std. lib imports. |
14 | import sys | |
15 | import re | |
16 | 14 | import unicodedata |
17 | float_re = re.compile(r'[-+]?(\d*\.?\d+(?:[eE][-+]?\d+)?|inf(?:inity)?|nan)$') | |
18 | if sys.version[0] == '2': | |
19 | int_re = re.compile(r'[-+]?\d+[lL]?$') | |
20 | else: | |
21 | int_re = re.compile(r'[-+]?\d+$') | |
15 | from natsort.unicode_numbers import decimal_chars | |
16 | from natsort.compat.py23 import PY_VERSION | |
17 | if PY_VERSION >= 3: | |
22 | 18 | long = int |
23 | unicode = str | |
24 | 19 | |
25 | 20 | |
26 | def fast_float(x, regex_matcher=float_re.match, uni=unicodedata.numeric): | |
27 | """Convert a string to a float quickly""" | |
28 | if type(x) in (int, long, float): | |
29 | return float(x) | |
30 | elif regex_matcher(x): | |
31 | return float(x) | |
32 | elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: | |
33 | return uni(x) | |
34 | else: | |
35 | return x | |
21 | NAN_INF = ['INF', 'INf', 'Inf', 'inF', 'iNF', 'InF', 'inf', 'iNf', | |
22 | 'NAN', 'nan', 'NaN', 'nAn', 'naN', 'NAn', 'nAN', 'Nan'] | |
23 | NAN_INF.extend(['+'+x[:2] for x in NAN_INF] + ['-'+x[:2] for x in NAN_INF]) | |
24 | NAN_INF = frozenset(NAN_INF) | |
25 | ASCII_NUMS = '0123456789+-' | |
36 | 26 | |
37 | 27 | |
38 | def fast_int(x, regex_matcher=int_re.match, uni=unicodedata.digit): | |
28 | def fast_float(x, key=lambda x: x, nan=None, | |
29 | uni=unicodedata.numeric, nan_inf=NAN_INF, | |
30 | _first_char=frozenset(decimal_chars + list(ASCII_NUMS + '.'))): | |
31 | """\ | |
32 | Convert a string to a float quickly, return input as-is if not possible. | |
33 | We don't need to accept all input that the real fast_int accepts because | |
34 | the input will be controlled by the splitting algorithm. | |
35 | """ | |
36 | if x[0] in _first_char or x.lstrip()[:3] in nan_inf: | |
37 | try: | |
38 | x = float(x) | |
39 | return nan if nan is not None and x != x else x | |
40 | except ValueError: | |
41 | try: | |
42 | return uni(x, key(x)) if len(x) == 1 else key(x) | |
43 | except TypeError: # pragma: no cover | |
44 | return key(x) | |
45 | else: | |
46 | try: | |
47 | return uni(x, key(x)) if len(x) == 1 else key(x) | |
48 | except TypeError: # pragma: no cover | |
49 | return key(x) | |
50 | ||
51 | ||
52 | def fast_int(x, key=lambda x: x, nan=None, uni=unicodedata.digit, | |
53 | _first_char=frozenset(decimal_chars + list(ASCII_NUMS))): | |
39 | 54 | """\ |
40 | 55 | Convert a string to a int quickly, return input as-is if not possible. |
56 | We don't need to accept all input that the real fast_int accepts because | |
57 | the input will be controlled by the splitting algorithm. | |
41 | 58 | """ |
42 | if type(x) in (int, long, float): | |
43 | return int(x) | |
44 | elif regex_matcher(x): | |
45 | return int(x.rstrip('Ll')) | |
46 | elif type(x) == unicode and len(x) == 1 and uni(x, None) is not None: | |
47 | return uni(x) | |
59 | if x[0] in _first_char: | |
60 | try: | |
61 | return long(x) | |
62 | except ValueError: | |
63 | try: | |
64 | return uni(x, key(x)) if len(x) == 1 else key(x) | |
65 | except TypeError: # pragma: no cover | |
66 | return key(x) | |
48 | 67 | else: |
49 | return x | |
50 | ||
51 | ||
52 | def isfloat(x, num_only=False): | |
53 | """Returns true if the input is a float, false otherwise.""" | |
54 | return type(x) == float | |
55 | ||
56 | ||
57 | def isint(x, num_only=False): | |
58 | """Returns true if the input is an int, false otherwise.""" | |
59 | return type(x) in set([int, long]) | |
68 | try: | |
69 | return uni(x, key(x)) if len(x) == 1 else key(x) | |
70 | except TypeError: # pragma: no cover | |
71 | return key(x) |
5 | 5 | absolute_import |
6 | 6 | ) |
7 | 7 | |
8 | from distutils.version import StrictVersion | |
9 | ||
8 | 10 | # If the user has fastnumbers installed, they will get great speed |
9 | 11 | # benefits. If not, we use the simulated functions that come with natsort. |
10 | 12 | try: |
11 | 13 | from fastnumbers import ( |
12 | 14 | fast_float, |
13 | 15 | fast_int, |
14 | isint, | |
15 | isfloat, | |
16 | 16 | ) |
17 | 17 | import fastnumbers |
18 | v = list(map(int, fastnumbers.__version__.split('.'))) | |
19 | if not (v[0] >= 0 and v[1] >= 5): # Require >= version 0.5.0. | |
20 | raise ImportError | |
18 | # Require >= version 0.7.1. | |
19 | if StrictVersion(fastnumbers.__version__) < StrictVersion('0.7.1'): | |
20 | raise ImportError # pragma: no cover | |
21 | 21 | except ImportError: |
22 | 22 | from natsort.compat.fake_fastnumbers import ( |
23 | 23 | fast_float, |
24 | 24 | fast_int, |
25 | isint, | |
26 | isfloat, | |
27 | 25 | ) |
5 | 5 | absolute_import |
6 | 6 | ) |
7 | 7 | |
8 | # Std. lib imports | |
8 | # Std. lib imports. | |
9 | 9 | import sys |
10 | 10 | |
11 | 11 | # Local imports. |
12 | from natsort.compat.py23 import PY_VERSION, cmp_to_key | |
12 | from natsort.compat.py23 import ( | |
13 | PY_VERSION, | |
14 | cmp_to_key, | |
15 | py23_unichr, | |
16 | ) | |
17 | ||
18 | # This string should be sorted after any other byte string because | |
19 | # it contains the max unicode character repeated 20 times. | |
20 | # You would need some odd data to come after that. | |
21 | null_string = '' | |
22 | null_string_max = py23_unichr(sys.maxunicode) * 20 | |
13 | 23 | |
14 | 24 | # Make the strxfrm function from strcoll on Python2 |
15 | 25 | # It can be buggy (especially on BSD-based systems), |
16 | # so prefer PyICU if available. | |
26 | # so prefer icu if available. | |
17 | 27 | try: |
18 | import PyICU | |
28 | import icu | |
19 | 29 | from locale import getlocale |
20 | 30 | |
21 | # If using PyICU, get the locale from the current global locale, | |
22 | # then create a sort key from that | |
23 | def get_pyicu_transform(l, _d={}): | |
24 | if l not in _d: | |
25 | if l == (None, None): | |
26 | c = PyICU.Collator.createInstance(PyICU.Locale()) | |
27 | else: | |
28 | loc = '.'.join(l) | |
29 | c = PyICU.Collator.createInstance(PyICU.Locale(loc)) | |
30 | _d[l] = c.getSortKey | |
31 | return _d[l] | |
32 | use_pyicu = True | |
33 | null_string = b'' | |
31 | null_string_locale = b'' | |
32 | ||
33 | # This string should in theory be sorted after any other byte | |
34 | # string because it contains the max byte char repeated many times. | |
35 | # You would need some odd data to come after that. | |
36 | null_string_locale_max = b'x7f' * 50 | |
34 | 37 | |
35 | 38 | def dumb_sort(): |
36 | 39 | return False |
40 | ||
41 | # If using icu, get the locale from the current global locale, | |
42 | def get_icu_locale(): | |
43 | try: | |
44 | return icu.Locale('.'.join(getlocale())) | |
45 | except TypeError: # pragma: no cover | |
46 | return icu.Locale() | |
47 | ||
48 | def get_strxfrm(): | |
49 | return icu.Collator.createInstance(get_icu_locale()).getSortKey | |
50 | ||
51 | def get_thousands_sep(): | |
52 | sep = icu.DecimalFormatSymbols.kGroupingSeparatorSymbol | |
53 | return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) | |
54 | ||
55 | def get_decimal_point(): | |
56 | sep = icu.DecimalFormatSymbols.kDecimalSeparatorSymbol | |
57 | return icu.DecimalFormatSymbols(get_icu_locale()).getSymbol(sep) | |
58 | ||
37 | 59 | except ImportError: |
38 | if sys.version[0] == '2': | |
60 | import locale | |
61 | if PY_VERSION < 3: | |
39 | 62 | from locale import strcoll |
40 | strxfrm = cmp_to_key(strcoll) | |
41 | null_string = strxfrm('') | |
63 | sentinel = object() | |
64 | ||
65 | def custom_strcoll(a, b, last=sentinel): | |
66 | """strcoll that can handle a sentinel that is always last.""" | |
67 | if a is last: | |
68 | return 0 if a is b else 1 | |
69 | elif b is last: # a cannot also be sentinel b/c above logic | |
70 | return -1 | |
71 | else: # neither are sentinel | |
72 | return strcoll(a, b) | |
73 | ||
74 | strxfrm = cmp_to_key(custom_strcoll) | |
75 | null_string_locale = strxfrm('') | |
76 | null_string_locale_max = strxfrm(sentinel) | |
42 | 77 | else: |
43 | 78 | from locale import strxfrm |
44 | null_string = '' | |
45 | use_pyicu = False | |
79 | null_string_locale = '' | |
80 | ||
81 | # This string should be sorted after any other byte string because | |
82 | # it contains the max unicode character repeated 20 times. | |
83 | # You would need some odd data to come after that. | |
84 | null_string_locale_max = py23_unichr(sys.maxunicode) * 20 | |
46 | 85 | |
47 | 86 | # On some systems, locale is broken and does not sort in the expected |
48 | 87 | # order. We will try to detect this and compensate. |
49 | 88 | def dumb_sort(): |
50 | 89 | return strxfrm('A') < strxfrm('a') |
51 | 90 | |
91 | def get_strxfrm(): | |
92 | return strxfrm | |
52 | 93 | |
53 | if PY_VERSION >= 3.3: | |
54 | def _low(x): | |
55 | return x.casefold() | |
56 | else: | |
57 | def _low(x): | |
58 | return x.lower() | |
94 | def get_thousands_sep(): | |
95 | sep = locale.localeconv()['thousands_sep'] | |
96 | # If this locale library is broken, some of the thousands separator | |
97 | # characters are incorrectly blank. Here is a lookup table of the | |
98 | # corrections I am aware of. | |
99 | if dumb_sort(): | |
100 | try: | |
101 | loc = '.'.join(locale.getlocale()) | |
102 | except TypeError: # No locale loaded, default to ',' | |
103 | return ',' | |
104 | return {'de_DE.ISO8859-15': '.', | |
105 | 'es_ES.ISO8859-1': '.', | |
106 | 'de_AT.ISO8859-1': '.', | |
107 | 'de_at': '\xa0', | |
108 | 'nl_NL.UTF-8': '.', | |
109 | 'es_es': '.', | |
110 | 'fr_CH.ISO8859-15': '\xa0', | |
111 | 'fr_CA.ISO8859-1': '\xa0', | |
112 | 'de_CH.ISO8859-1': '.', | |
113 | 'fr_FR.ISO8859-15': '\xa0', | |
114 | 'nl_NL.ISO8859-1': '.', | |
115 | 'ca_ES.UTF-8': '.', | |
116 | 'nl_NL.ISO8859-15': '.', | |
117 | 'de_ch': "'", | |
118 | 'ca_es': '.', | |
119 | 'de_AT.ISO8859-15': '.', | |
120 | 'ca_ES.ISO8859-1': '.', | |
121 | 'de_AT.UTF-8': '.', | |
122 | 'es_ES.UTF-8': '.', | |
123 | 'fr_fr': '\xa0', | |
124 | 'es_ES.ISO8859-15': '.', | |
125 | 'de_DE.ISO8859-1': '.', | |
126 | 'nl_nl': '.', | |
127 | 'fr_ch': '\xa0', | |
128 | 'fr_ca': '\xa0', | |
129 | 'de_DE.UTF-8': '.', | |
130 | 'ca_ES.ISO8859-15': '.', | |
131 | 'de_CH.ISO8859-15': '.', | |
132 | 'fr_FR.ISO8859-1': '\xa0', | |
133 | 'fr_CH.ISO8859-1': '\xa0', | |
134 | 'de_de': '.', | |
135 | 'fr_FR.UTF-8': '\xa0', | |
136 | 'fr_CA.ISO8859-15': '\xa0', | |
137 | }.get(loc, sep) | |
138 | else: | |
139 | return sep | |
140 | ||
141 | def get_decimal_point(): | |
142 | return locale.localeconv()['decimal_point'] |
15 | 15 | |
16 | 16 | # Numeric form of version |
17 | 17 | PY_VERSION = float(sys.version[:3]) |
18 | NEWPY = PY_VERSION >= 3.3 | |
18 | 19 | |
19 | 20 | # Assume all strings are Unicode in Python 2 |
20 | 21 | py23_str = str if sys.version[0] == '3' else unicode |
28 | 29 | # unichr function |
29 | 30 | py23_unichr = chr if sys.version[0] == '3' else unichr |
30 | 31 | |
32 | ||
33 | def _py23_cmp(a, b): | |
34 | return (a > b) - (a < b) | |
35 | ||
36 | ||
37 | py23_cmp = _py23_cmp if sys.version[0] == '3' else cmp | |
38 | ||
31 | 39 | # zip as an iterator |
32 | 40 | if sys.version[0] == '3': |
33 | 41 | py23_zip = zip |
42 | py23_map = map | |
43 | py23_filter = filter | |
34 | 44 | else: |
35 | 45 | import itertools |
36 | 46 | py23_zip = itertools.izip |
47 | py23_map = itertools.imap | |
48 | py23_filter = itertools.ifilter | |
37 | 49 | |
38 | 50 | |
39 | 51 | # cmp_to_key was not created till 2.7, so require this for 2.6 |
84 | 96 | func = func_or_str |
85 | 97 | doc = func.__doc__ |
86 | 98 | |
87 | doc = str_change_func(doc) | |
99 | if doc is not None: | |
100 | doc = str_change_func(doc) | |
88 | 101 | |
89 | 102 | if func: |
90 | 103 | func.__doc__ = doc |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | This module is intended to help combine some locale functions | |
3 | together for natsort consumption. It also accounts for Python2 | |
4 | and Python3 differences. | |
5 | """ | |
6 | from __future__ import ( | |
7 | print_function, | |
8 | division, | |
9 | unicode_literals, | |
10 | absolute_import | |
11 | ) | |
12 | ||
13 | # Std. lib imports. | |
14 | from itertools import chain | |
15 | from locale import localeconv | |
16 | ||
17 | # Local imports. | |
18 | from natsort.compat.locale import use_pyicu, _low | |
19 | if use_pyicu: | |
20 | from natsort.compat.locale import get_pyicu_transform, getlocale | |
21 | else: | |
22 | from natsort.compat.locale import strxfrm | |
23 | ||
24 | ||
25 | def groupletters(x): | |
26 | """Double all characters, making doubled letters lowercase.""" | |
27 | return ''.join(chain.from_iterable([_low(y), y] for y in x)) | |
28 | ||
29 | ||
30 | def grouper(val, func): | |
31 | """\ | |
32 | Attempt to convert a string to a number. If the conversion | |
33 | was not possible, run it through the letter grouper | |
34 | to make the sorting work as requested. | |
35 | """ | |
36 | # Return the number or transformed string. | |
37 | # If the input is identical to the output, then no conversion happened. | |
38 | s = func[0](val) | |
39 | return groupletters(s) if not func[1](s) else s | |
40 | ||
41 | ||
42 | def locale_convert(val, func, group): | |
43 | """\ | |
44 | Attempt to convert a string to a number, first converting | |
45 | the decimal place character if needed. Then, if the conversion | |
46 | was not possible (i.e. it is not a number), run it through | |
47 | strxfrm to make the work sorting as requested, possibly grouping first. | |
48 | """ | |
49 | ||
50 | # Format the number so that the conversion function can interpret it. | |
51 | radix = localeconv()['decimal_point'] | |
52 | s = val.replace(radix, '.') if radix != '.' else val | |
53 | ||
54 | # Perform the conversion | |
55 | t = func[0](s) | |
56 | ||
57 | # Return the number or transformed string. | |
58 | # If the input is identical to the output, then no conversion happened. | |
59 | # In this case, we don't want to return the function output because it | |
60 | # may have had characters modified from the above 'replace' call, | |
61 | # so we return the input. | |
62 | if group: | |
63 | if use_pyicu: | |
64 | xfrm = get_pyicu_transform(getlocale()) | |
65 | return xfrm(groupletters(val)) if not func[1](t) else t | |
66 | else: | |
67 | return strxfrm(groupletters(val)) if not func[1](t) else t | |
68 | else: | |
69 | if use_pyicu: | |
70 | xfrm = get_pyicu_transform(getlocale()) | |
71 | return xfrm(val) if not func[1](t) else t | |
72 | else: | |
73 | return strxfrm(val) if not func[1](t) else t |
17 | 17 | ) |
18 | 18 | |
19 | 19 | # Std lib. imports. |
20 | import re | |
21 | 20 | from operator import itemgetter |
22 | 21 | from functools import partial |
23 | 22 | from warnings import warn |
24 | 23 | |
25 | 24 | # Local imports. |
25 | import sys | |
26 | ||
27 | import natsort.compat.locale | |
26 | 28 | from natsort.ns_enum import ns |
27 | from natsort.compat.py23 import u_format | |
29 | from natsort.compat.py23 import ( | |
30 | u_format, | |
31 | py23_str, | |
32 | py23_cmp) | |
28 | 33 | from natsort.utils import ( |
29 | 34 | _natsort_key, |
30 | 35 | _args_to_enum, |
31 | 36 | _do_decoding, |
37 | _regex_chooser, | |
38 | _parse_string_factory, | |
39 | _parse_path_factory, | |
40 | _parse_number_factory, | |
41 | _parse_bytes_factory, | |
42 | _input_string_transform_factory, | |
43 | _string_component_transform_factory, | |
44 | _final_data_transform_factory, | |
32 | 45 | ) |
33 | 46 | |
34 | 47 | # Make sure the doctest works for either python2 or python3 |
66 | 79 | True |
67 | 80 | >>> f(12345) == 12345 |
68 | 81 | True |
82 | >>> # On Python 3, without decoder this would return [b'a10', b'a2'] | |
69 | 83 | >>> natsorted([b'a10', b'a2'], key=decoder('utf8')) == [b'a2', b'a10'] |
70 | 84 | True |
71 | >>> # On Python 3, without decoder this would return [b'a10', b'a2'] | |
85 | >>> # On Python 3, without decoder this would raise a TypeError. | |
72 | 86 | >>> natsorted([b'a10', 'a2'], key=decoder('utf8')) == ['a2', b'a10'] |
73 | 87 | True |
74 | >>> # On Python 3, without decoder this would raise a TypeError. | |
75 | 88 | |
76 | 89 | """ |
77 | 90 | return partial(_do_decoding, encoding=encoding) |
131 | 144 | """Undocumented, kept for backwards-compatibility.""" |
132 | 145 | msg = "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" |
133 | 146 | warn(msg, DeprecationWarning) |
134 | return _natsort_key(val, key, _args_to_enum(**_kwargs) | alg) | |
147 | return natsort_keygen(key, alg, **_kwargs)(val) | |
135 | 148 | |
136 | 149 | |
137 | 150 | @u_format |
145 | 158 | |
146 | 159 | The user may customize the generated function with the |
147 | 160 | arguments to `natsort_keygen`, including an optional |
148 | `key` function which will be called before the `natsort_key`. | |
161 | `key` function. | |
149 | 162 | |
150 | 163 | Parameters |
151 | 164 | ---------- |
162 | 175 | Returns |
163 | 176 | ------- |
164 | 177 | out : function |
165 | A wrapped version of the `natsort_key` function that is | |
178 | A function that parses input for natural sorting that is | |
166 | 179 | suitable for passing as the `key` argument to functions |
167 | 180 | such as `sorted`. |
168 | 181 | |
173 | 186 | Examples |
174 | 187 | -------- |
175 | 188 | `natsort_keygen` is a convenient way to create a custom key |
176 | to sort lists in-place (for example). Calling with no objects | |
177 | will return a plain `natsort_key` instance:: | |
189 | to sort lists in-place (for example).:: | |
178 | 190 | |
179 | 191 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] |
180 | 192 | >>> a.sort(key=natsort_keygen(alg=ns.REAL)) |
182 | 194 | [{u}'num-3', {u}'num2', {u}'num5.10', {u}'num5.3'] |
183 | 195 | |
184 | 196 | """ |
185 | return partial(_natsort_key, key=key, alg=_args_to_enum(**_kwargs) | alg) | |
197 | # Transform old arguments to the ns enum. | |
198 | try: | |
199 | alg = _args_to_enum(**_kwargs) | alg | |
200 | except TypeError: | |
201 | msg = "natsort_keygen: 'alg' argument must be from the enum 'ns'" | |
202 | raise ValueError(msg+', got {0}'.format(py23_str(alg))) | |
203 | ||
204 | # Add the _DUMB option if the locale library is broken. | |
205 | if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): | |
206 | alg |= ns._DUMB | |
207 | ||
208 | # Set some variables that will be passed to the factory functions | |
209 | if alg & ns.NUMAFTER: | |
210 | if alg & ns.LOCALEALPHA: | |
211 | sep = natsort.compat.locale.null_string_locale_max | |
212 | else: | |
213 | sep = natsort.compat.locale.null_string_max | |
214 | pre_sep = natsort.compat.locale.null_string_max | |
215 | else: | |
216 | if alg & ns.LOCALEALPHA: | |
217 | sep = natsort.compat.locale.null_string_locale | |
218 | else: | |
219 | sep = natsort.compat.locale.null_string | |
220 | pre_sep = natsort.compat.locale.null_string | |
221 | regex = _regex_chooser[alg & ns._NUMERIC_ONLY] | |
222 | ||
223 | # Create the functions that will be used to split strings. | |
224 | input_transform = _input_string_transform_factory(alg) | |
225 | component_transform = _string_component_transform_factory(alg) | |
226 | final_transform = _final_data_transform_factory(alg, sep, pre_sep) | |
227 | ||
228 | # Create the high-level parsing functions for strings, bytes, and numbers. | |
229 | string_func = _parse_string_factory( | |
230 | alg, sep, regex.split, | |
231 | input_transform, component_transform, final_transform | |
232 | ) | |
233 | if alg & ns.PATH: | |
234 | string_func = _parse_path_factory(string_func) | |
235 | bytes_func = _parse_bytes_factory(alg) | |
236 | num_func = _parse_number_factory(alg, sep, pre_sep) | |
237 | ||
238 | # Return the natsort key with the parsing path pre-chosen. | |
239 | return partial( | |
240 | _natsort_key, | |
241 | key=key, | |
242 | string_func=string_func, | |
243 | bytes_func=bytes_func, | |
244 | num_func=num_func | |
245 | ) | |
186 | 246 | |
187 | 247 | |
188 | 248 | @u_format |
189 | 249 | def natsorted(seq, key=None, reverse=False, alg=0, **_kwargs): |
190 | 250 | """\ |
191 | Sorts a sequence naturally. | |
192 | ||
193 | Sorts a sequence naturally (alphabetically and numerically), | |
194 | not lexicographically. Returns a new copy of the sorted | |
195 | sequence as a list. | |
251 | Sorts an iterable naturally. | |
252 | ||
253 | Sorts an iterable naturally (alphabetically and numerically), | |
254 | not lexicographically. Returns a list containing a sorted copy | |
255 | of the iterable. | |
256 | ||
257 | Parameters | |
258 | ---------- | |
259 | seq : iterable | |
260 | The iterable to sort. | |
261 | ||
262 | key : callable, optional | |
263 | A key used to determine how to sort each element of the iterable. | |
264 | It is **not** applied recursively. | |
265 | It should accept a single argument and return a single value. | |
266 | ||
267 | reverse : {{True, False}}, optional | |
268 | Return the list in reversed sorted order. The default is | |
269 | `False`. | |
270 | ||
271 | alg : ns enum, optional | |
272 | This option is used to control which algorithm `natsort` | |
273 | uses when sorting. For details into these options, please see | |
274 | the :class:`ns` class documentation. The default is `ns.INT`. | |
275 | ||
276 | Returns | |
277 | ------- | |
278 | out: list | |
279 | The sorted sequence. | |
280 | ||
281 | See Also | |
282 | -------- | |
283 | natsort_keygen : Generates the key that makes natural sorting possible. | |
284 | realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. | |
285 | humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. | |
286 | index_natsorted : Returns the sorted indexes from `natsorted`. | |
287 | ||
288 | Examples | |
289 | -------- | |
290 | Use `natsorted` just like the builtin `sorted`:: | |
291 | ||
292 | >>> a = ['num3', 'num5', 'num2'] | |
293 | >>> natsorted(a) | |
294 | [{u}'num2', {u}'num3', {u}'num5'] | |
295 | ||
296 | """ | |
297 | natsort_key = natsort_keygen(key, alg, **_kwargs) | |
298 | return sorted(seq, reverse=reverse, key=natsort_key) | |
299 | ||
300 | ||
301 | @u_format | |
302 | def versorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
303 | """\ | |
304 | Identical to :func:`natsorted`. | |
305 | ||
306 | This function exists for backwards compatibility with `natsort` | |
307 | version < 4.0.0. Future development should use :func:`natsorted`. | |
308 | ||
309 | See Also | |
310 | -------- | |
311 | natsorted | |
312 | ||
313 | """ | |
314 | return natsorted(seq, key, reverse, alg, **_kwargs) | |
315 | ||
316 | ||
317 | @u_format | |
318 | def humansorted(seq, key=None, reverse=False, alg=0): | |
319 | """\ | |
320 | Convenience function to properly sort non-numeric characters. | |
321 | ||
322 | Convenience function to properly sort non-numeric characters | |
323 | in a locale-aware fashion (a.k.a "human sorting"). This is a | |
324 | wrapper around ``natsorted(seq, alg=ns.LOCALE)``. | |
196 | 325 | |
197 | 326 | Parameters |
198 | 327 | ---------- |
211 | 340 | alg : ns enum, optional |
212 | 341 | This option is used to control which algorithm `natsort` |
213 | 342 | uses when sorting. For details into these options, please see |
214 | the :class:`ns` class documentation. The default is `ns.INT`. | |
215 | ||
216 | Returns | |
217 | ------- | |
218 | out: list | |
219 | The sorted sequence. | |
220 | ||
221 | See Also | |
222 | -------- | |
223 | natsort_keygen : Generates the key that makes natural sorting possible. | |
224 | realsorted : A wrapper for ``natsorted(seq, alg=ns.REAL)``. | |
225 | humansorted : A wrapper for ``natsorted(seq, alg=ns.LOCALE)``. | |
226 | index_natsorted : Returns the sorted indexes from `natsorted`. | |
227 | ||
228 | Examples | |
229 | -------- | |
230 | Use `natsorted` just like the builtin `sorted`:: | |
231 | ||
232 | >>> a = ['num3', 'num5', 'num2'] | |
233 | >>> natsorted(a) | |
234 | [{u}'num2', {u}'num3', {u}'num5'] | |
235 | ||
236 | """ | |
237 | alg = _args_to_enum(**_kwargs) | alg | |
238 | try: | |
239 | return sorted(seq, reverse=reverse, key=natsort_keygen(key, alg=alg)) | |
240 | except TypeError as e: # pragma: no cover | |
241 | # In the event of an unresolved "unorderable types" error | |
242 | # for string to number type comparisons (not str/bytes), | |
243 | # attempt to sort again, being careful to prevent this error. | |
244 | r = re.compile(r'(?:str|bytes)\(\) [<>] (?:str|bytes)\(\)') | |
245 | if 'unorderable types' in str(e) and not r.search(str(e)): | |
246 | return sorted(seq, reverse=reverse, | |
247 | key=natsort_keygen(key, | |
248 | alg=alg | ns.TYPESAFE)) | |
249 | else: | |
250 | # Re-raise if the problem was not "unorderable types" | |
251 | raise | |
252 | ||
253 | ||
254 | @u_format | |
255 | def versorted(seq, key=None, reverse=False, alg=0, **_kwargs): | |
256 | """\ | |
257 | Identical to :func:`natsorted`. | |
258 | ||
259 | This function exists for backwards compatibility with `natsort` | |
260 | version < 4.0.0. Future development should use :func:`natsorted`. | |
261 | ||
262 | Please see the :func:`natsorted` documentation for use. | |
263 | ||
264 | See Also | |
265 | -------- | |
266 | natsorted | |
267 | ||
268 | """ | |
269 | return natsorted(seq, key, reverse, alg, **_kwargs) | |
270 | ||
271 | ||
272 | @u_format | |
273 | def humansorted(seq, key=None, reverse=False, alg=0): | |
274 | """\ | |
275 | Convenience function to properly sort non-numeric characters. | |
276 | ||
277 | Convenience function to properly sort non-numeric characters | |
278 | in a locale-aware fashion (a.k.a "human sorting"). This is a | |
279 | wrapper around ``natsorted(seq, alg=ns.LOCALE)``. | |
280 | ||
281 | .. warning:: On BSD-based systems (like Mac OS X), the underlying | |
282 | C library that Python's locale module uses is broken. | |
283 | On these systems it is recommended that you install | |
284 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
285 | if you wish to use ``humansorted``, especially if you need | |
286 | to handle non-ASCII characters. If you are on | |
287 | one of systems and get unexpected results, please try | |
288 | using `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
289 | before filing a bug report to `natsort`. | |
290 | ||
291 | Parameters | |
292 | ---------- | |
293 | seq : iterable | |
294 | The sequence to sort. | |
295 | ||
296 | key : callable, optional | |
297 | A key used to determine how to sort each element of the sequence. | |
298 | It is **not** applied recursively. | |
299 | It should accept a single argument and return a single value. | |
300 | ||
301 | reverse : {{True, False}}, optional | |
302 | Return the list in reversed sorted order. The default is | |
303 | `False`. | |
304 | ||
305 | alg : ns enum, optional | |
306 | This option is used to control which algorithm `natsort` | |
307 | uses when sorting. For details into these options, please see | |
308 | 343 | the :class:`ns` class documentation. The default is `ns.LOCALE`. |
309 | 344 | |
310 | 345 | Returns |
318 | 353 | |
319 | 354 | Notes |
320 | 355 | ----- |
321 | You may find that if you do not explicitly set | |
322 | the locale your results may not be as you expect, although | |
323 | as of ``natsort`` version 4.0.0 the sorting algorithm has been | |
324 | updated to account for a buggy ``locale`` installation. | |
325 | In the below example 'en_US.UTF-8' is used, but you should use your | |
326 | locale:: | |
327 | ||
328 | >>> import locale | |
329 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
330 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
331 | >>> # right?) | |
332 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
333 | 'en_US.UTF-8' | |
334 | ||
335 | It is preferred that you do this before importing `natsort`. | |
336 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
337 | above) then you should not need to do explicitly set a locale. | |
356 | Please read :ref:`locale_issues` before using `humansorted`. | |
338 | 357 | |
339 | 358 | Examples |
340 | 359 | -------- |
462 | 481 | [{u}'baz', {u}'foo', {u}'bar'] |
463 | 482 | |
464 | 483 | """ |
465 | alg = _args_to_enum(**_kwargs) | alg | |
466 | 484 | if key is None: |
467 | 485 | newkey = itemgetter(1) |
468 | 486 | else: |
470 | 488 | return key(itemgetter(1)(x)) |
471 | 489 | # Pair the index and sequence together, then sort by element |
472 | 490 | index_seq_pair = [[x, y] for x, y in enumerate(seq)] |
473 | try: | |
474 | index_seq_pair.sort(reverse=reverse, | |
475 | key=natsort_keygen(newkey, alg=alg)) | |
476 | except TypeError as e: # pragma: no cover | |
477 | # In the event of an unresolved "unorderable types" error | |
478 | # attempt to sort again, being careful to prevent this error. | |
479 | if 'unorderable types' in str(e): | |
480 | index_seq_pair.sort(reverse=reverse, | |
481 | key=natsort_keygen(newkey, | |
482 | alg=alg | ns.TYPESAFE)) | |
483 | else: | |
484 | # Re-raise if the problem was not "unorderable types" | |
485 | raise | |
491 | index_seq_pair.sort(reverse=reverse, | |
492 | key=natsort_keygen(newkey, alg, **_kwargs)) | |
486 | 493 | return [x for x, _ in index_seq_pair] |
487 | 494 | |
488 | 495 | |
517 | 524 | of the given sequence. |
518 | 525 | |
519 | 526 | This is a wrapper around ``index_natsorted(seq, alg=ns.LOCALE)``. |
520 | Please see the ``humansorted`` documentation for caveats of | |
521 | using ``index_humansorted``. | |
522 | 527 | |
523 | 528 | Parameters |
524 | 529 | ---------- |
551 | 556 | |
552 | 557 | Notes |
553 | 558 | ----- |
554 | You may find that if you do not explicitly set | |
555 | the locale your results may not be as you expect, although | |
556 | as of ``natsort`` version 4.0.0 the sorting algorithm has been | |
557 | updated to account for a buggy ``locale`` installation. | |
558 | In the below example 'en_US.UTF-8' is used, but you should use your | |
559 | locale:: | |
560 | ||
561 | >>> import locale | |
562 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
563 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
564 | >>> # right?) | |
565 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
566 | 'en_US.UTF-8' | |
567 | ||
568 | It is preferred that you do this before importing `natsort`. | |
569 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
570 | above) then you should not need to explicitly set a locale. | |
559 | Please read :ref:`locale_issues` before using `humansorted`. | |
571 | 560 | |
572 | 561 | Examples |
573 | 562 | -------- |
644 | 633 | """\ |
645 | 634 | Order a given sequence by an index sequence. |
646 | 635 | |
647 | The output of `index_natsorted` and `index_versorted` is a | |
636 | The output of `index_natsorted` is a | |
648 | 637 | sequence of integers (index) that correspond to how its input |
649 | 638 | sequence **would** be sorted. The idea is that this index can |
650 | 639 | be used to reorder multiple sequences by the sorted order of the |
653 | 642 | |
654 | 643 | Parameters |
655 | 644 | ---------- |
656 | seq : iterable | |
645 | seq : sequence | |
657 | 646 | The sequence to order. |
658 | 647 | |
659 | 648 | index : iterable |
660 | The sequence that indicates how to order `seq`. | |
649 | The iterable that indicates how to order `seq`. | |
661 | 650 | It should be the same length as `seq` and consist |
662 | 651 | of integers only. |
663 | 652 | |
664 | 653 | iter : {{True, False}}, optional |
665 | 654 | If `True`, the ordered sequence is returned as a |
666 | generator expression; otherwise it is returned as a | |
655 | iterator; otherwise it is returned as a | |
667 | 656 | list. The default is `False`. |
668 | 657 | |
669 | 658 | Returns |
670 | 659 | ------- |
671 | out : {{list, generator}} | |
672 | The sequence ordered by `index`, as a `list` or as a | |
673 | generator expression (depending on the value of `iter`). | |
660 | out : {{list, iterator}} | |
661 | The sequence ordered by `index`, as a `list` or as an | |
662 | iterator (depending on the value of `iter`). | |
674 | 663 | |
675 | 664 | See Also |
676 | 665 | -------- |
677 | 666 | index_natsorted |
678 | index_versorted | |
679 | 667 | index_humansorted |
680 | 668 | index_realsorted |
681 | 669 | |
683 | 671 | -------- |
684 | 672 | |
685 | 673 | `order_by_index` is a convenience function that helps you apply |
686 | the result of `index_natsorted` or `index_versorted`:: | |
674 | the result of `index_natsorted`:: | |
687 | 675 | |
688 | 676 | >>> a = ['num3', 'num5', 'num2'] |
689 | 677 | >>> b = ['foo', 'bar', 'baz'] |
698 | 686 | |
699 | 687 | """ |
700 | 688 | return (seq[i] for i in index) if iter else [seq[i] for i in index] |
689 | ||
690 | ||
691 | if float(sys.version[:3]) < 3: | |
692 | # pylint: disable=unused-variable | |
693 | class natcmp(object): | |
694 | """ | |
695 | Compare two objects using a key and an algorithm. | |
696 | ||
697 | Parameters | |
698 | ---------- | |
699 | x : object | |
700 | First object to compare. | |
701 | ||
702 | y : object | |
703 | Second object to compare. | |
704 | ||
705 | alg : ns enum, optional | |
706 | This option is used to control which algorithm `natsort` | |
707 | uses when sorting. For details into these options, please see | |
708 | the :class:`ns` class documentation. The default is `ns.INT`. | |
709 | ||
710 | Returns | |
711 | ------- | |
712 | out: int | |
713 | 0 if x and y are equal, 1 if x > y, -1 if y > x. | |
714 | ||
715 | See Also | |
716 | -------- | |
717 | natsort_keygen : Generates a key that makes natural sorting possible. | |
718 | ||
719 | Examples | |
720 | -------- | |
721 | Use `natcmp` just like the builtin `cmp`:: | |
722 | ||
723 | >>> one = 1 | |
724 | >>> two = 2 | |
725 | >>> natcmp(one, two) | |
726 | -1 | |
727 | """ | |
728 | cached_keys = {} | |
729 | ||
730 | def __new__(cls, x, y, alg=0, *args, **kwargs): | |
731 | try: | |
732 | alg = _args_to_enum(**kwargs) | alg | |
733 | except TypeError: | |
734 | msg = ("natsort_keygen: 'alg' argument must be " | |
735 | "from the enum 'ns'") | |
736 | raise ValueError(msg + ', got {0}'.format(py23_str(alg))) | |
737 | ||
738 | # Add the _DUMB option if the locale library is broken. | |
739 | if alg & ns.LOCALEALPHA and natsort.compat.locale.dumb_sort(): | |
740 | alg |= ns._DUMB | |
741 | ||
742 | if alg not in cls.cached_keys: | |
743 | cls.cached_keys[alg] = natsort_keygen(alg=alg) | |
744 | ||
745 | return py23_cmp(cls.cached_keys[alg](x), cls.cached_keys[alg](y)) |
14 | 14 | This class acts like an enum to control the `natsort` algorithm. The |
15 | 15 | user may select several options simultaneously by or'ing the options |
16 | 16 | together. For example, to choose ``ns.INT``, ``ns.PATH``, and |
17 | ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. | |
17 | ``ns.LOCALE``, you could do ``ns.INT | ns.LOCALE | ns.PATH``. Each | |
18 | function in the :mod:`natsort` package has an `alg` option that accepts | |
19 | this enum to allow fine control over how your input is sorted. | |
18 | 20 | |
19 | 21 | Each option has a shortened 1- or 2-letter form. |
20 | 22 | |
21 | .. warning:: On BSD-based systems (like Mac OS X), the underlying | |
22 | C library that Python's locale module uses is broken. | |
23 | On these systems it is recommended that you install | |
24 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ | |
25 | if you wish to use ``LOCALE``, especially if you need | |
26 | to handle non-ASCII characters. If you are on one of | |
27 | systems and get unexpected results, please try using | |
28 | `PyICU <https://pypi.python.org/pypi/PyICU>`_ before | |
29 | filing a bug report to ``natsort``. | |
23 | .. note:: Please read :ref:`locale_issues` before using ``ns.LOCALE``. | |
30 | 24 | |
31 | 25 | Attributes |
32 | 26 | ---------- |
36 | 30 | Tell `natsort` to parse numbers as floats. |
37 | 31 | UNSIGNED, U (default) |
38 | 32 | Tell `natsort` to ignore any sign (i.e. "-" or "+") to the immediate |
39 | left of a number. It is the same as setting the old `signed` option | |
40 | to `False`. This is the default. | |
33 | left of a number. This is the default. | |
41 | 34 | SIGNED, S |
42 | 35 | Tell `natsort` to take into account any sign (i.e. "-" or "+") |
43 | to the immediate left of a number. It is the same as setting | |
44 | the old `signed` option to `True`. | |
45 | VERSION, V | |
46 | This is a shortcut for ``ns.INT | ns.UNSIGNED``, which is useful | |
47 | when attempting to sort version numbers. It is the same as | |
48 | setting the old `number_type` option to `None`. Since | |
49 | ``ns.INT | ns.UNSIGNED`` is default, this is is | |
50 | unnecessary. | |
51 | DIGIT, D | |
52 | Same as `VERSION` above. | |
36 | to the immediate left of a number. | |
53 | 37 | REAL, R |
54 | 38 | This is a shortcut for ``ns.FLOAT | ns.SIGNED``, which is useful |
55 | 39 | when attempting to sort real numbers. |
56 | 40 | NOEXP, N |
57 | Tell `natsort` to not search for exponents as part of the number. | |
41 | Tell `natsort` to not search for exponents as part of a float number. | |
58 | 42 | For example, with `NOEXP` the number "5.6E5" would be interpreted |
59 | as `5.6`, `"E"`, and `5`. It is the same as setting the old | |
60 | `exp` option to `False`. | |
43 | as `5.6`, `"E"`, and `5` instead of `560000`. | |
44 | NUMAFTER, NA | |
45 | Tell `natsort` to sort numbers after non-numbers. By default | |
46 | numbers will be ordered before non-numbers. | |
61 | 47 | PATH, P |
62 | 48 | Tell `natsort` to interpret strings as filesystem paths, so they |
63 | 49 | will be split according to the filesystem separator |
67 | 53 | sorted properly; 'Folder/' will be placed at the end, not at the |
68 | 54 | front. It is the same as setting the old `as_path` option to |
69 | 55 | `True`. |
56 | COMPATIBILITYNORMALIZE, CN | |
57 | Use the "NFKD" unicode normalization form on input rather than the | |
58 | default "NFD". This will transform characters such as '⑦' into | |
59 | '7'. Please see https://stackoverflow.com/a/7934397/1399279, | |
60 | https://stackoverflow.com/a/7931547/1399279, | |
61 | and http://unicode.org/reports/tr15/ for full details into unicode | |
62 | normalization. | |
70 | 63 | LOCALE, L |
71 | Tell `natsort` to be locale-aware when sorting strings (everything | |
72 | that was not converted to a number). Your sorting results will vary | |
73 | depending on your current locale. Generally, the `GROUPLETTERS` | |
74 | option is not needed with `LOCALE` because the `locale` library | |
75 | groups the letters in the same manner (although you may still | |
76 | need `GROUPLETTERS` if there are numbers in your strings). | |
64 | Tell `natsort` to be locale-aware when sorting. This includes both | |
65 | proper sorting of alphabetical characters as well as proper | |
66 | handling of locale-dependent decimal separators and thousands | |
67 | separators. This is a shortcut for | |
68 | ``ns.LOCALEALPHA | ns.LOCALENUM``. | |
69 | Your sorting results will vary depending on your current locale. | |
70 | LOCALEALPHA, LA | |
71 | Tell `natsort` to be locale-aware when sorting, but only for | |
72 | alphabetical characters. | |
73 | LOCALENUM, LN | |
74 | Tell `natsort` to be locale-aware when sorting, but only for | |
75 | decimal separators and thousands separators. | |
77 | 76 | IGNORECASE, IC |
78 | 77 | Tell `natsort` to ignore case when sorting. For example, |
79 | 78 | ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as |
96 | 95 | ``['Banana', 'apple', 'banana', 'Apple']`` would be sorted as |
97 | 96 | ``['Apple', 'apple', 'Banana', 'banana']``. |
98 | 97 | Useless when used with `IGNORECASE`; use with `LOWERCASEFIRST` |
99 | to reverse the order of upper and lower case. | |
98 | to reverse the order of upper and lower case. Generally not | |
99 | needed with `LOCALE`. | |
100 | 100 | CAPITALFIRST, C |
101 | 101 | Only used when `LOCALE` is enabled. Tell `natsort` to put all |
102 | 102 | capitalized words before non-capitalized words. This is essentially |
109 | 109 | treat these as +Infinity and place them after all the other numbers. |
110 | 110 | By default, an NaN be treated as -Infinity and be placed first. |
111 | 111 | TYPESAFE, T |
112 | Try hard to avoid "unorderable types" error on Python 3. It | |
113 | is the same as setting the old `py3_safe` option to `True`. | |
114 | This is only needed if using ``SIGNED`` or if sorting by | |
115 | ``FLOAT``. You shouldn't need to use this unless you are using | |
116 | ``natsort_keygen``. *NOTE:* It cannot resolve the ``TypeError`` | |
117 | from trying to compare `str` and `bytes`. | |
112 | Deprecated as of `natsort` version 5.0.0; this option is now | |
113 | a no-op because it is always true. | |
114 | VERSION, V | |
115 | Deprecated as of `natsort` version 5.0.0; this option is now | |
116 | a no-op because it is the default. | |
117 | DIGIT, D | |
118 | Same as `VERSION` above. | |
118 | 119 | |
119 | 120 | Notes |
120 | 121 | ----- |
121 | If using `LOCALE`, you may find that if you do not explicitly set | |
122 | the locale your results may not be as you expect... I have found that | |
123 | it depends on the system you are on. To do this is straightforward | |
124 | (in the below example I use 'en_US.UTF-8', but you should use your | |
125 | locale):: | |
122 | If you prefer to use `import natsort as ns` as opposed to | |
123 | `from natsort import natsorted, ns`, the `ns` options are | |
124 | available as top-level imports. | |
126 | 125 | |
127 | >>> import locale | |
128 | >>> # The 'str' call is only to get around a bug on Python 2.x | |
129 | >>> # where 'setlocale' does not expect unicode strings (ironic, | |
130 | >>> # right?) | |
131 | >>> locale.setlocale(locale.LC_ALL, str('en_US.UTF-8')) | |
132 | 'en_US.UTF-8' | |
133 | ||
134 | It is preferred that you do this before importing `natsort`. | |
135 | If you use `PyICU <https://pypi.python.org/pypi/PyICU>`_ (see warning | |
136 | above) then you should not need to do this. | |
126 | >>> import natsort as ns | |
127 | >>> a = ['num5.10', 'num-3', 'num5.3', 'num2'] | |
128 | >>> ns.natsorted(a, alg=ns.REAL) == ns.natsorted(a, alg=ns.ns.REAL) | |
129 | True | |
137 | 130 | |
138 | 131 | """ |
139 | pass | |
132 | # Following were previously now options but are now defaults. | |
133 | TYPESAFE = T = 0 | |
134 | INT = I = 0 | |
135 | VERSION = V = 0 | |
136 | DIGIT = D = 0 | |
137 | UNSIGNED = U = 0 | |
140 | 138 | |
139 | # The below are options. The values are stored as powers of two | |
140 | # so bitmasks can be used to extract the user's requested options. | |
141 | FLOAT = F = 1 << 0 | |
142 | SIGNED = S = 1 << 1 | |
143 | REAL = R = FLOAT | SIGNED | |
144 | NOEXP = N = 1 << 2 | |
145 | PATH = P = 1 << 3 | |
146 | LOCALEALPHA = LA = 1 << 4 | |
147 | LOCALENUM = LN = 1 << 5 | |
148 | LOCALE = L = LOCALEALPHA | LOCALENUM | |
149 | IGNORECASE = IC = 1 << 6 | |
150 | LOWERCASEFIRST = LF = 1 << 7 | |
151 | GROUPLETTERS = G = 1 << 8 | |
152 | UNGROUPLETTERS = UG = 1 << 9 | |
153 | CAPITALFIRST = C = UNGROUPLETTERS | |
154 | NANLAST = NL = 1 << 10 | |
155 | COMPATIBILITYNORMALIZE = CN = 1 << 11 | |
156 | NUMAFTER = NA = 1 << 12 | |
141 | 157 | |
142 | # Sort algorithm "enum" values. | |
143 | _ns = { | |
144 | 'INT': 0, 'I': 0, | |
145 | 'FLOAT': 1, 'F': 1, | |
146 | 'UNSIGNED': 0, 'U': 0, | |
147 | 'SIGNED': 2, 'S': 2, | |
148 | 'VERSION': 0, 'V': 0, # Shortcut for INT | UNSIGNED | |
149 | 'DIGIT': 0, 'D': 0, # Shortcut for INT | UNSIGNED | |
150 | 'REAL': 3, 'R': 3, # Shortcut for FLOAT | SIGNED | |
151 | 'NOEXP': 4, 'N': 4, | |
152 | 'PATH': 8, 'P': 8, | |
153 | 'LOCALE': 16, 'L': 16, | |
154 | 'IGNORECASE': 32, 'IC': 32, | |
155 | 'LOWERCASEFIRST': 64, 'LF': 64, | |
156 | 'GROUPLETTERS': 128, 'G': 128, | |
157 | 'UNGROUPLETTERS': 256, 'UG': 256, | |
158 | 'CAPITALFIRST': 256, 'C': 256, | |
159 | 'NANLAST': 512, 'NL': 512, | |
160 | 'TYPESAFE': 2048, 'T': 2048, | |
161 | } | |
162 | # Populate the ns class with the _ns values. | |
163 | for x, y in _ns.items(): | |
164 | setattr(ns, x, y) | |
158 | # The below are private options for internal use only. | |
159 | _NUMERIC_ONLY = REAL | NOEXP | |
160 | _DUMB = 1 << 31 |
18 | 18 | # Rather than determine this on the fly, which would incur a startup |
19 | 19 | # runtime penalty, the hex values of the Unicode numeric characters |
20 | 20 | # are hard-coded below. |
21 | numeric_hex = [ | |
22 | 0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, 0X663, 0X664, | |
23 | 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, 0X6F1, 0X6F2, 0X6F3, 0X6F4, | |
24 | 0X6F5, 0X6F6, 0X6F7, 0X6F8, 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4, | |
25 | 0X7C5, 0X7C6, 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A, | |
26 | 0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, 0X9E9, 0X9EA, | |
27 | 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, 0X9F5, 0X9F6, 0X9F7, 0X9F8, | |
28 | 0X9F9, 0XA66, 0XA67, 0XA68, 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E, | |
29 | 0XA6F, 0XAE6, 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE, | |
30 | 0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, 0XB6D, 0XB6E, | |
31 | 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, 0XB77, 0XBE6, 0XBE7, 0XBE8, | |
32 | 0XBE9, 0XBEA, 0XBEB, 0XBEC, 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2, | |
33 | 0XC66, 0XC67, 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F, | |
34 | 0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, 0XCE7, 0XCE8, | |
35 | 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, 0XCEF, 0XD66, 0XD67, 0XD68, | |
36 | 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72, | |
37 | 0XD73, 0XD74, 0XD75, 0XE50, 0XE51, 0XE52, 0XE53, 0XE54, 0XE55, 0XE56, | |
38 | 0XE57, 0XE58, 0XE59, 0XED0, 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6, | |
39 | 0XED7, 0XED8, 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26, | |
40 | 0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, 0XF2F, 0XF30, | |
41 | 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, 0X1043, 0X1044, 0X1045, | |
42 | 0X1046, 0X1047, 0X1048, 0X1049, 0X1090, 0X1091, 0X1092, 0X1093, 0X1094, | |
43 | 0X1095, 0X1096, 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C, | |
44 | 0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, 0X1374, 0X1375, | |
45 | 0X1376, 0X1377, 0X1378, 0X1379, 0X137A, 0X137B, 0X137C, 0X16EE, 0X16EF, | |
46 | 0X16F0, 0X17E0, 0X17E1, 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7, | |
47 | 0X17E8, 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, 0X17F6, | |
48 | 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, 0X1813, 0X1814, 0X1815, | |
49 | 0X1816, 0X1817, 0X1818, 0X1819, 0X1946, 0X1947, 0X1948, 0X1949, 0X194A, | |
50 | 0X194B, 0X194C, 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3, | |
51 | 0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, 0X1A80, 0X1A81, | |
52 | 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, 0X1A87, 0X1A88, 0X1A89, 0X1A90, | |
53 | 0X1A91, 0X1A92, 0X1A93, 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99, | |
54 | 0X1B50, 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, 0X1B58, | |
55 | 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, 0X1BB5, 0X1BB6, 0X1BB7, | |
56 | 0X1BB8, 0X1BB9, 0X1C40, 0X1C41, 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46, | |
57 | 0X1C47, 0X1C48, 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55, | |
58 | 0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, 0X2076, 0X2077, | |
59 | 0X2078, 0X2079, 0X2080, 0X2081, 0X2082, 0X2083, 0X2084, 0X2085, 0X2086, | |
60 | 0X2087, 0X2088, 0X2089, 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155, | |
61 | 0X2156, 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, 0X215E, | |
62 | 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, 0X2165, 0X2166, 0X2167, | |
63 | 0X2168, 0X2169, 0X216A, 0X216B, 0X216C, 0X216D, 0X216E, 0X216F, 0X2170, | |
64 | 0X2171, 0X2172, 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179, | |
65 | 0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, 0X2181, 0X2182, | |
66 | 0X2185, 0X2186, 0X2187, 0X2188, 0X2189, 0X2460, 0X2461, 0X2462, 0X2463, | |
67 | 0X2464, 0X2465, 0X2466, 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C, | |
68 | 0X246D, 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, 0X2475, | |
69 | 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, 0X247C, 0X247D, 0X247E, | |
70 | 0X247F, 0X2480, 0X2481, 0X2482, 0X2483, 0X2484, 0X2485, 0X2486, 0X2487, | |
71 | 0X2488, 0X2489, 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490, | |
72 | 0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, 0X2498, 0X2499, | |
73 | 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, 0X24ED, 0X24EE, 0X24EF, 0X24F0, | |
74 | 0X24F1, 0X24F2, 0X24F3, 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9, | |
75 | 0X24FA, 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, 0X2778, | |
76 | 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, 0X277F, 0X2780, 0X2781, | |
77 | 0X2782, 0X2783, 0X2784, 0X2785, 0X2786, 0X2787, 0X2788, 0X2789, 0X278A, | |
78 | 0X278B, 0X278C, 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793, | |
79 | 0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, 0X3026, 0X3027, | |
80 | 0X3028, 0X3029, 0X3038, 0X3039, 0X303A, 0X3192, 0X3193, 0X3194, 0X3195, | |
81 | 0X3220, 0X3221, 0X3222, 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228, | |
82 | 0X3229, 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, 0X324F, | |
83 | 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, 0X3257, 0X3258, 0X3259, | |
84 | 0X325A, 0X325B, 0X325C, 0X325D, 0X325E, 0X325F, 0X3280, 0X3281, 0X3282, | |
85 | 0X3283, 0X3284, 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2, | |
86 | 0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, 0X32BA, 0X32BB, | |
87 | 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, 0X3483, 0X382A, 0X3B4D, 0X4E00, | |
88 | 0X4E03, 0X4E07, 0X4E09, 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0, | |
89 | 0X4EDF, 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, 0X516D, | |
90 | 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, 0X53C2, 0X53C3, 0X53C4, | |
91 | 0X56DB, 0X58F1, 0X58F9, 0X5E7A, 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E, | |
92 | 0X5F10, 0X62FE, 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C, | |
93 | 0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, 0X96F6, 0XA620, | |
94 | 0XA621, 0XA622, 0XA623, 0XA624, 0XA625, 0XA626, 0XA627, 0XA628, 0XA629, | |
95 | 0XA6E6, 0XA6E7, 0XA6E8, 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE, | |
96 | 0XA6EF, 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, 0XA8D1, | |
97 | 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, 0XA8D8, 0XA8D9, 0XA900, | |
98 | 0XA901, 0XA902, 0XA903, 0XA904, 0XA905, 0XA906, 0XA907, 0XA908, 0XA909, | |
99 | 0XA9D0, 0XA9D1, 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8, | |
100 | 0XA9D9, 0XAA50, 0XAA51, 0XAA52, 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57, | |
101 | 0XAA58, 0XAA59, 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6, | |
102 | 0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, 0XF9D1, 0XF9D3, | |
103 | 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, 0XFF14, 0XFF15, 0XFF16, 0XFF17, | |
104 | 0XFF18, 0XFF19, 0X10107, 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C, | |
105 | 0X1010D, 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, 0X10114, | |
106 | 0X10115, 0X10116, 0X10117, 0X10118, 0X10119, 0X1011A, 0X1011B, 0X1011C, | |
107 | 0X1011D, 0X1011E, 0X1011F, 0X10120, 0X10121, 0X10122, 0X10123, 0X10124, | |
108 | 0X10125, 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, 0X1012C, | |
109 | 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, 0X10132, 0X10133, 0X10140, | |
110 | 0X10141, 0X10142, 0X10143, 0X10144, 0X10145, 0X10146, 0X10147, 0X10148, | |
111 | 0X10149, 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, 0X10150, | |
112 | 0X10151, 0X10152, 0X10153, 0X10154, 0X10155, 0X10156, 0X10157, 0X10158, | |
113 | 0X10159, 0X1015A, 0X1015B, 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160, | |
114 | 0X10161, 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, 0X10168, | |
115 | 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, 0X1016E, 0X1016F, 0X10170, | |
116 | 0X10171, 0X10172, 0X10173, 0X10174, 0X10175, 0X10176, 0X10177, 0X10178, | |
117 | 0X1018A, 0X10320, 0X10321, 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1, | |
118 | 0X103D2, 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, 0X104A3, | |
119 | 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, 0X104A9, 0X10858, 0X10859, | |
120 | 0X1085A, 0X1085B, 0X1085C, 0X1085D, 0X1085E, 0X1085F, 0X10916, 0X10917, | |
121 | 0X10918, 0X10919, 0X1091A, 0X1091B, 0X10A40, 0X10A41, 0X10A42, 0X10A43, | |
122 | 0X10A44, 0X10A45, 0X10A46, 0X10A47, 0X10A7D, 0X10A7E, 0X10B58, 0X10B59, | |
123 | 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, 0X10B5F, 0X10B78, 0X10B79, | |
124 | 0X10B7A, 0X10B7B, 0X10B7C, 0X10B7D, 0X10B7E, 0X10B7F, 0X10E60, 0X10E61, | |
125 | 0X10E62, 0X10E63, 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69, | |
126 | 0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, 0X10E70, 0X10E71, | |
127 | 0X10E72, 0X10E73, 0X10E74, 0X10E75, 0X10E76, 0X10E77, 0X10E78, 0X10E79, | |
128 | 0X10E7A, 0X10E7B, 0X10E7C, 0X10E7D, 0X10E7E, 0X11052, 0X11053, 0X11054, | |
129 | 0X11055, 0X11056, 0X11057, 0X11058, 0X11059, 0X1105A, 0X1105B, 0X1105C, | |
130 | 0X1105D, 0X1105E, 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064, | |
131 | 0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, 0X1106B, 0X1106C, | |
132 | 0X1106D, 0X1106E, 0X1106F, 0X110F0, 0X110F1, 0X110F2, 0X110F3, 0X110F4, | |
133 | 0X110F5, 0X110F6, 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138, | |
134 | 0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, 0X1113F, 0X111D0, | |
135 | 0X111D1, 0X111D2, 0X111D3, 0X111D4, 0X111D5, 0X111D6, 0X111D7, 0X111D8, | |
136 | 0X111D9, 0X116C0, 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6, | |
137 | 0X116C7, 0X116C8, 0X116C9, 0X12400, 0X12401, 0X12402, 0X12403, 0X12404, | |
138 | 0X12405, 0X12406, 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C, | |
139 | 0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, 0X12413, 0X12414, | |
140 | 0X12415, 0X12416, 0X12417, 0X12418, 0X12419, 0X1241A, 0X1241B, 0X1241C, | |
141 | 0X1241D, 0X1241E, 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424, | |
142 | 0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, 0X1242B, 0X1242C, | |
143 | 0X1242D, 0X1242E, 0X1242F, 0X12430, 0X12431, 0X12432, 0X12433, 0X12434, | |
144 | 0X12435, 0X12436, 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C, | |
145 | 0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, 0X12443, 0X12444, | |
146 | 0X12445, 0X12446, 0X12447, 0X12448, 0X12449, 0X1244A, 0X1244B, 0X1244C, | |
147 | 0X1244D, 0X1244E, 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454, | |
148 | 0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, 0X1245B, 0X1245C, | |
149 | 0X1245D, 0X1245E, 0X1245F, 0X12460, 0X12461, 0X12462, 0X1D360, 0X1D361, | |
150 | 0X1D362, 0X1D363, 0X1D364, 0X1D365, 0X1D366, 0X1D367, 0X1D368, 0X1D369, | |
151 | 0X1D36A, 0X1D36B, 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371, | |
152 | 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, 0X1D7D3, 0X1D7D4, 0X1D7D5, | |
153 | 0X1D7D6, 0X1D7D7, 0X1D7D8, 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD, | |
154 | 0X1D7DE, 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, 0X1D7E5, | |
155 | 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, 0X1D7EB, 0X1D7EC, 0X1D7ED, | |
156 | 0X1D7EE, 0X1D7EF, 0X1D7F0, 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5, | |
157 | 0X1D7F6, 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, 0X1D7FD, | |
158 | 0X1D7FE, 0X1D7FF, 0X1F100, 0X1F101, 0X1F102, 0X1F103, 0X1F104, 0X1F105, | |
159 | 0X1F106, 0X1F107, 0X1F108, 0X1F109, 0X1F10A, 0X20001, 0X20064, 0X200E2, | |
160 | 0X20121, 0X2092A, 0X20983, 0X2098C, 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, | |
161 | 0X22390, 0X22998, 0X23B1B, 0X2626D, 0X2F890, | |
162 | ] | |
21 | numeric_hex = ( | |
22 | 0XB2, 0XB3, 0XB9, 0XBC, 0XBD, 0XBE, 0X660, 0X661, 0X662, | |
23 | 0X663, 0X664, 0X665, 0X666, 0X667, 0X668, 0X669, 0X6F0, | |
24 | 0X6F1, 0X6F2, 0X6F3, 0X6F4, 0X6F5, 0X6F6, 0X6F7, 0X6F8, | |
25 | 0X6F9, 0X7C0, 0X7C1, 0X7C2, 0X7C3, 0X7C4, 0X7C5, 0X7C6, | |
26 | 0X7C7, 0X7C8, 0X7C9, 0X966, 0X967, 0X968, 0X969, 0X96A, | |
27 | 0X96B, 0X96C, 0X96D, 0X96E, 0X96F, 0X9E6, 0X9E7, 0X9E8, | |
28 | 0X9E9, 0X9EA, 0X9EB, 0X9EC, 0X9ED, 0X9EE, 0X9EF, 0X9F4, | |
29 | 0X9F5, 0X9F6, 0X9F7, 0X9F8, 0X9F9, 0XA66, 0XA67, 0XA68, | |
30 | 0XA69, 0XA6A, 0XA6B, 0XA6C, 0XA6D, 0XA6E, 0XA6F, 0XAE6, | |
31 | 0XAE7, 0XAE8, 0XAE9, 0XAEA, 0XAEB, 0XAEC, 0XAED, 0XAEE, | |
32 | 0XAEF, 0XB66, 0XB67, 0XB68, 0XB69, 0XB6A, 0XB6B, 0XB6C, | |
33 | 0XB6D, 0XB6E, 0XB6F, 0XB72, 0XB73, 0XB74, 0XB75, 0XB76, | |
34 | 0XB77, 0XBE6, 0XBE7, 0XBE8, 0XBE9, 0XBEA, 0XBEB, 0XBEC, | |
35 | 0XBED, 0XBEE, 0XBEF, 0XBF0, 0XBF1, 0XBF2, 0XC66, 0XC67, | |
36 | 0XC68, 0XC69, 0XC6A, 0XC6B, 0XC6C, 0XC6D, 0XC6E, 0XC6F, | |
37 | 0XC78, 0XC79, 0XC7A, 0XC7B, 0XC7C, 0XC7D, 0XC7E, 0XCE6, | |
38 | 0XCE7, 0XCE8, 0XCE9, 0XCEA, 0XCEB, 0XCEC, 0XCED, 0XCEE, | |
39 | 0XCEF, 0XD58, 0XD59, 0XD5A, 0XD5B, 0XD5C, 0XD5D, 0XD5E, | |
40 | 0XD66, 0XD67, 0XD68, 0XD69, 0XD6A, 0XD6B, 0XD6C, 0XD6D, | |
41 | 0XD6E, 0XD6F, 0XD70, 0XD71, 0XD72, 0XD73, 0XD74, 0XD75, | |
42 | 0XD76, 0XD77, 0XD78, 0XDE6, 0XDE7, 0XDE8, 0XDE9, 0XDEA, | |
43 | 0XDEB, 0XDEC, 0XDED, 0XDEE, 0XDEF, 0XE50, 0XE51, 0XE52, | |
44 | 0XE53, 0XE54, 0XE55, 0XE56, 0XE57, 0XE58, 0XE59, 0XED0, | |
45 | 0XED1, 0XED2, 0XED3, 0XED4, 0XED5, 0XED6, 0XED7, 0XED8, | |
46 | 0XED9, 0XF20, 0XF21, 0XF22, 0XF23, 0XF24, 0XF25, 0XF26, | |
47 | 0XF27, 0XF28, 0XF29, 0XF2A, 0XF2B, 0XF2C, 0XF2D, 0XF2E, | |
48 | 0XF2F, 0XF30, 0XF31, 0XF32, 0XF33, 0X1040, 0X1041, 0X1042, | |
49 | 0X1043, 0X1044, 0X1045, 0X1046, 0X1047, 0X1048, 0X1049, | |
50 | 0X1090, 0X1091, 0X1092, 0X1093, 0X1094, 0X1095, 0X1096, | |
51 | 0X1097, 0X1098, 0X1099, 0X1369, 0X136A, 0X136B, 0X136C, | |
52 | 0X136D, 0X136E, 0X136F, 0X1370, 0X1371, 0X1372, 0X1373, | |
53 | 0X1374, 0X1375, 0X1376, 0X1377, 0X1378, 0X1379, 0X137A, | |
54 | 0X137B, 0X137C, 0X16EE, 0X16EF, 0X16F0, 0X17E0, 0X17E1, | |
55 | 0X17E2, 0X17E3, 0X17E4, 0X17E5, 0X17E6, 0X17E7, 0X17E8, | |
56 | 0X17E9, 0X17F0, 0X17F1, 0X17F2, 0X17F3, 0X17F4, 0X17F5, | |
57 | 0X17F6, 0X17F7, 0X17F8, 0X17F9, 0X1810, 0X1811, 0X1812, | |
58 | 0X1813, 0X1814, 0X1815, 0X1816, 0X1817, 0X1818, 0X1819, | |
59 | 0X1946, 0X1947, 0X1948, 0X1949, 0X194A, 0X194B, 0X194C, | |
60 | 0X194D, 0X194E, 0X194F, 0X19D0, 0X19D1, 0X19D2, 0X19D3, | |
61 | 0X19D4, 0X19D5, 0X19D6, 0X19D7, 0X19D8, 0X19D9, 0X19DA, | |
62 | 0X1A80, 0X1A81, 0X1A82, 0X1A83, 0X1A84, 0X1A85, 0X1A86, | |
63 | 0X1A87, 0X1A88, 0X1A89, 0X1A90, 0X1A91, 0X1A92, 0X1A93, | |
64 | 0X1A94, 0X1A95, 0X1A96, 0X1A97, 0X1A98, 0X1A99, 0X1B50, | |
65 | 0X1B51, 0X1B52, 0X1B53, 0X1B54, 0X1B55, 0X1B56, 0X1B57, | |
66 | 0X1B58, 0X1B59, 0X1BB0, 0X1BB1, 0X1BB2, 0X1BB3, 0X1BB4, | |
67 | 0X1BB5, 0X1BB6, 0X1BB7, 0X1BB8, 0X1BB9, 0X1C40, 0X1C41, | |
68 | 0X1C42, 0X1C43, 0X1C44, 0X1C45, 0X1C46, 0X1C47, 0X1C48, | |
69 | 0X1C49, 0X1C50, 0X1C51, 0X1C52, 0X1C53, 0X1C54, 0X1C55, | |
70 | 0X1C56, 0X1C57, 0X1C58, 0X1C59, 0X2070, 0X2074, 0X2075, | |
71 | 0X2076, 0X2077, 0X2078, 0X2079, 0X2080, 0X2081, 0X2082, | |
72 | 0X2083, 0X2084, 0X2085, 0X2086, 0X2087, 0X2088, 0X2089, | |
73 | 0X2150, 0X2151, 0X2152, 0X2153, 0X2154, 0X2155, 0X2156, | |
74 | 0X2157, 0X2158, 0X2159, 0X215A, 0X215B, 0X215C, 0X215D, | |
75 | 0X215E, 0X215F, 0X2160, 0X2161, 0X2162, 0X2163, 0X2164, | |
76 | 0X2165, 0X2166, 0X2167, 0X2168, 0X2169, 0X216A, 0X216B, | |
77 | 0X216C, 0X216D, 0X216E, 0X216F, 0X2170, 0X2171, 0X2172, | |
78 | 0X2173, 0X2174, 0X2175, 0X2176, 0X2177, 0X2178, 0X2179, | |
79 | 0X217A, 0X217B, 0X217C, 0X217D, 0X217E, 0X217F, 0X2180, | |
80 | 0X2181, 0X2182, 0X2185, 0X2186, 0X2187, 0X2188, 0X2189, | |
81 | 0X2460, 0X2461, 0X2462, 0X2463, 0X2464, 0X2465, 0X2466, | |
82 | 0X2467, 0X2468, 0X2469, 0X246A, 0X246B, 0X246C, 0X246D, | |
83 | 0X246E, 0X246F, 0X2470, 0X2471, 0X2472, 0X2473, 0X2474, | |
84 | 0X2475, 0X2476, 0X2477, 0X2478, 0X2479, 0X247A, 0X247B, | |
85 | 0X247C, 0X247D, 0X247E, 0X247F, 0X2480, 0X2481, 0X2482, | |
86 | 0X2483, 0X2484, 0X2485, 0X2486, 0X2487, 0X2488, 0X2489, | |
87 | 0X248A, 0X248B, 0X248C, 0X248D, 0X248E, 0X248F, 0X2490, | |
88 | 0X2491, 0X2492, 0X2493, 0X2494, 0X2495, 0X2496, 0X2497, | |
89 | 0X2498, 0X2499, 0X249A, 0X249B, 0X24EA, 0X24EB, 0X24EC, | |
90 | 0X24ED, 0X24EE, 0X24EF, 0X24F0, 0X24F1, 0X24F2, 0X24F3, | |
91 | 0X24F4, 0X24F5, 0X24F6, 0X24F7, 0X24F8, 0X24F9, 0X24FA, | |
92 | 0X24FB, 0X24FC, 0X24FD, 0X24FE, 0X24FF, 0X2776, 0X2777, | |
93 | 0X2778, 0X2779, 0X277A, 0X277B, 0X277C, 0X277D, 0X277E, | |
94 | 0X277F, 0X2780, 0X2781, 0X2782, 0X2783, 0X2784, 0X2785, | |
95 | 0X2786, 0X2787, 0X2788, 0X2789, 0X278A, 0X278B, 0X278C, | |
96 | 0X278D, 0X278E, 0X278F, 0X2790, 0X2791, 0X2792, 0X2793, | |
97 | 0X2CFD, 0X3007, 0X3021, 0X3022, 0X3023, 0X3024, 0X3025, | |
98 | 0X3026, 0X3027, 0X3028, 0X3029, 0X3038, 0X3039, 0X303A, | |
99 | 0X3192, 0X3193, 0X3194, 0X3195, 0X3220, 0X3221, 0X3222, | |
100 | 0X3223, 0X3224, 0X3225, 0X3226, 0X3227, 0X3228, 0X3229, | |
101 | 0X3248, 0X3249, 0X324A, 0X324B, 0X324C, 0X324D, 0X324E, | |
102 | 0X324F, 0X3251, 0X3252, 0X3253, 0X3254, 0X3255, 0X3256, | |
103 | 0X3257, 0X3258, 0X3259, 0X325A, 0X325B, 0X325C, 0X325D, | |
104 | 0X325E, 0X325F, 0X3280, 0X3281, 0X3282, 0X3283, 0X3284, | |
105 | 0X3285, 0X3286, 0X3287, 0X3288, 0X3289, 0X32B1, 0X32B2, | |
106 | 0X32B3, 0X32B4, 0X32B5, 0X32B6, 0X32B7, 0X32B8, 0X32B9, | |
107 | 0X32BA, 0X32BB, 0X32BC, 0X32BD, 0X32BE, 0X32BF, 0X3405, | |
108 | 0X3483, 0X382A, 0X3B4D, 0X4E00, 0X4E03, 0X4E07, 0X4E09, | |
109 | 0X4E5D, 0X4E8C, 0X4E94, 0X4E96, 0X4EBF, 0X4EC0, 0X4EDF, | |
110 | 0X4EE8, 0X4F0D, 0X4F70, 0X5104, 0X5146, 0X5169, 0X516B, | |
111 | 0X516D, 0X5341, 0X5343, 0X5344, 0X5345, 0X534C, 0X53C1, | |
112 | 0X53C2, 0X53C3, 0X53C4, 0X56DB, 0X58F1, 0X58F9, 0X5E7A, | |
113 | 0X5EFE, 0X5EFF, 0X5F0C, 0X5F0D, 0X5F0E, 0X5F10, 0X62FE, | |
114 | 0X634C, 0X67D2, 0X6F06, 0X7396, 0X767E, 0X8086, 0X842C, | |
115 | 0X8CAE, 0X8CB3, 0X8D30, 0X9621, 0X9646, 0X964C, 0X9678, | |
116 | 0X96F6, 0XA620, 0XA621, 0XA622, 0XA623, 0XA624, 0XA625, | |
117 | 0XA626, 0XA627, 0XA628, 0XA629, 0XA6E6, 0XA6E7, 0XA6E8, | |
118 | 0XA6E9, 0XA6EA, 0XA6EB, 0XA6EC, 0XA6ED, 0XA6EE, 0XA6EF, | |
119 | 0XA830, 0XA831, 0XA832, 0XA833, 0XA834, 0XA835, 0XA8D0, | |
120 | 0XA8D1, 0XA8D2, 0XA8D3, 0XA8D4, 0XA8D5, 0XA8D6, 0XA8D7, | |
121 | 0XA8D8, 0XA8D9, 0XA900, 0XA901, 0XA902, 0XA903, 0XA904, | |
122 | 0XA905, 0XA906, 0XA907, 0XA908, 0XA909, 0XA9D0, 0XA9D1, | |
123 | 0XA9D2, 0XA9D3, 0XA9D4, 0XA9D5, 0XA9D6, 0XA9D7, 0XA9D8, | |
124 | 0XA9D9, 0XA9F0, 0XA9F1, 0XA9F2, 0XA9F3, 0XA9F4, 0XA9F5, | |
125 | 0XA9F6, 0XA9F7, 0XA9F8, 0XA9F9, 0XAA50, 0XAA51, 0XAA52, | |
126 | 0XAA53, 0XAA54, 0XAA55, 0XAA56, 0XAA57, 0XAA58, 0XAA59, | |
127 | 0XABF0, 0XABF1, 0XABF2, 0XABF3, 0XABF4, 0XABF5, 0XABF6, | |
128 | 0XABF7, 0XABF8, 0XABF9, 0XF96B, 0XF973, 0XF978, 0XF9B2, | |
129 | 0XF9D1, 0XF9D3, 0XF9FD, 0XFF10, 0XFF11, 0XFF12, 0XFF13, | |
130 | 0XFF14, 0XFF15, 0XFF16, 0XFF17, 0XFF18, 0XFF19, 0X10107, | |
131 | 0X10108, 0X10109, 0X1010A, 0X1010B, 0X1010C, 0X1010D, | |
132 | 0X1010E, 0X1010F, 0X10110, 0X10111, 0X10112, 0X10113, | |
133 | 0X10114, 0X10115, 0X10116, 0X10117, 0X10118, 0X10119, | |
134 | 0X1011A, 0X1011B, 0X1011C, 0X1011D, 0X1011E, 0X1011F, | |
135 | 0X10120, 0X10121, 0X10122, 0X10123, 0X10124, 0X10125, | |
136 | 0X10126, 0X10127, 0X10128, 0X10129, 0X1012A, 0X1012B, | |
137 | 0X1012C, 0X1012D, 0X1012E, 0X1012F, 0X10130, 0X10131, | |
138 | 0X10132, 0X10133, 0X10140, 0X10141, 0X10142, 0X10143, | |
139 | 0X10144, 0X10145, 0X10146, 0X10147, 0X10148, 0X10149, | |
140 | 0X1014A, 0X1014B, 0X1014C, 0X1014D, 0X1014E, 0X1014F, | |
141 | 0X10150, 0X10151, 0X10152, 0X10153, 0X10154, 0X10155, | |
142 | 0X10156, 0X10157, 0X10158, 0X10159, 0X1015A, 0X1015B, | |
143 | 0X1015C, 0X1015D, 0X1015E, 0X1015F, 0X10160, 0X10161, | |
144 | 0X10162, 0X10163, 0X10164, 0X10165, 0X10166, 0X10167, | |
145 | 0X10168, 0X10169, 0X1016A, 0X1016B, 0X1016C, 0X1016D, | |
146 | 0X1016E, 0X1016F, 0X10170, 0X10171, 0X10172, 0X10173, | |
147 | 0X10174, 0X10175, 0X10176, 0X10177, 0X10178, 0X1018A, | |
148 | 0X1018B, 0X102E1, 0X102E2, 0X102E3, 0X102E4, 0X102E5, | |
149 | 0X102E6, 0X102E7, 0X102E8, 0X102E9, 0X102EA, 0X102EB, | |
150 | 0X102EC, 0X102ED, 0X102EE, 0X102EF, 0X102F0, 0X102F1, | |
151 | 0X102F2, 0X102F3, 0X102F4, 0X102F5, 0X102F6, 0X102F7, | |
152 | 0X102F8, 0X102F9, 0X102FA, 0X102FB, 0X10320, 0X10321, | |
153 | 0X10322, 0X10323, 0X10341, 0X1034A, 0X103D1, 0X103D2, | |
154 | 0X103D3, 0X103D4, 0X103D5, 0X104A0, 0X104A1, 0X104A2, | |
155 | 0X104A3, 0X104A4, 0X104A5, 0X104A6, 0X104A7, 0X104A8, | |
156 | 0X104A9, 0X10858, 0X10859, 0X1085A, 0X1085B, 0X1085C, | |
157 | 0X1085D, 0X1085E, 0X1085F, 0X10879, 0X1087A, 0X1087B, | |
158 | 0X1087C, 0X1087D, 0X1087E, 0X1087F, 0X108A7, 0X108A8, | |
159 | 0X108A9, 0X108AA, 0X108AB, 0X108AC, 0X108AD, 0X108AE, | |
160 | 0X108AF, 0X108FB, 0X108FC, 0X108FD, 0X108FE, 0X108FF, | |
161 | 0X10916, 0X10917, 0X10918, 0X10919, 0X1091A, 0X1091B, | |
162 | 0X109BC, 0X109BD, 0X109C0, 0X109C1, 0X109C2, 0X109C3, | |
163 | 0X109C4, 0X109C5, 0X109C6, 0X109C7, 0X109C8, 0X109C9, | |
164 | 0X109CA, 0X109CB, 0X109CC, 0X109CD, 0X109CE, 0X109CF, | |
165 | 0X109D2, 0X109D3, 0X109D4, 0X109D5, 0X109D6, 0X109D7, | |
166 | 0X109D8, 0X109D9, 0X109DA, 0X109DB, 0X109DC, 0X109DD, | |
167 | 0X109DE, 0X109DF, 0X109E0, 0X109E1, 0X109E2, 0X109E3, | |
168 | 0X109E4, 0X109E5, 0X109E6, 0X109E7, 0X109E8, 0X109E9, | |
169 | 0X109EA, 0X109EB, 0X109EC, 0X109ED, 0X109EE, 0X109EF, | |
170 | 0X109F0, 0X109F1, 0X109F2, 0X109F3, 0X109F4, 0X109F5, | |
171 | 0X109F6, 0X109F7, 0X109F8, 0X109F9, 0X109FA, 0X109FB, | |
172 | 0X109FC, 0X109FD, 0X109FE, 0X109FF, 0X10A40, 0X10A41, | |
173 | 0X10A42, 0X10A43, 0X10A44, 0X10A45, 0X10A46, 0X10A47, | |
174 | 0X10A48, 0X10A7D, 0X10A7E, 0X10A9D, 0X10A9E, 0X10A9F, | |
175 | 0X10AEB, 0X10AEC, 0X10AED, 0X10AEE, 0X10AEF, 0X10B58, | |
176 | 0X10B59, 0X10B5A, 0X10B5B, 0X10B5C, 0X10B5D, 0X10B5E, | |
177 | 0X10B5F, 0X10B78, 0X10B79, 0X10B7A, 0X10B7B, 0X10B7C, | |
178 | 0X10B7D, 0X10B7E, 0X10B7F, 0X10BA9, 0X10BAA, 0X10BAB, | |
179 | 0X10BAC, 0X10BAD, 0X10BAE, 0X10BAF, 0X10CFA, 0X10CFB, | |
180 | 0X10CFC, 0X10CFD, 0X10CFE, 0X10CFF, 0X10D30, 0X10D31, | |
181 | 0X10D32, 0X10D33, 0X10D34, 0X10D35, 0X10D36, 0X10D37, | |
182 | 0X10D38, 0X10D39, 0X10E60, 0X10E61, 0X10E62, 0X10E63, | |
183 | 0X10E64, 0X10E65, 0X10E66, 0X10E67, 0X10E68, 0X10E69, | |
184 | 0X10E6A, 0X10E6B, 0X10E6C, 0X10E6D, 0X10E6E, 0X10E6F, | |
185 | 0X10E70, 0X10E71, 0X10E72, 0X10E73, 0X10E74, 0X10E75, | |
186 | 0X10E76, 0X10E77, 0X10E78, 0X10E79, 0X10E7A, 0X10E7B, | |
187 | 0X10E7C, 0X10E7D, 0X10E7E, 0X10F1D, 0X10F1E, 0X10F1F, | |
188 | 0X10F20, 0X10F21, 0X10F22, 0X10F23, 0X10F24, 0X10F25, | |
189 | 0X10F26, 0X10F51, 0X10F52, 0X10F53, 0X10F54, 0X11052, | |
190 | 0X11053, 0X11054, 0X11055, 0X11056, 0X11057, 0X11058, | |
191 | 0X11059, 0X1105A, 0X1105B, 0X1105C, 0X1105D, 0X1105E, | |
192 | 0X1105F, 0X11060, 0X11061, 0X11062, 0X11063, 0X11064, | |
193 | 0X11065, 0X11066, 0X11067, 0X11068, 0X11069, 0X1106A, | |
194 | 0X1106B, 0X1106C, 0X1106D, 0X1106E, 0X1106F, 0X110F0, | |
195 | 0X110F1, 0X110F2, 0X110F3, 0X110F4, 0X110F5, 0X110F6, | |
196 | 0X110F7, 0X110F8, 0X110F9, 0X11136, 0X11137, 0X11138, | |
197 | 0X11139, 0X1113A, 0X1113B, 0X1113C, 0X1113D, 0X1113E, | |
198 | 0X1113F, 0X111D0, 0X111D1, 0X111D2, 0X111D3, 0X111D4, | |
199 | 0X111D5, 0X111D6, 0X111D7, 0X111D8, 0X111D9, 0X111E1, | |
200 | 0X111E2, 0X111E3, 0X111E4, 0X111E5, 0X111E6, 0X111E7, | |
201 | 0X111E8, 0X111E9, 0X111EA, 0X111EB, 0X111EC, 0X111ED, | |
202 | 0X111EE, 0X111EF, 0X111F0, 0X111F1, 0X111F2, 0X111F3, | |
203 | 0X111F4, 0X112F0, 0X112F1, 0X112F2, 0X112F3, 0X112F4, | |
204 | 0X112F5, 0X112F6, 0X112F7, 0X112F8, 0X112F9, 0X11450, | |
205 | 0X11451, 0X11452, 0X11453, 0X11454, 0X11455, 0X11456, | |
206 | 0X11457, 0X11458, 0X11459, 0X114D0, 0X114D1, 0X114D2, | |
207 | 0X114D3, 0X114D4, 0X114D5, 0X114D6, 0X114D7, 0X114D8, | |
208 | 0X114D9, 0X11650, 0X11651, 0X11652, 0X11653, 0X11654, | |
209 | 0X11655, 0X11656, 0X11657, 0X11658, 0X11659, 0X116C0, | |
210 | 0X116C1, 0X116C2, 0X116C3, 0X116C4, 0X116C5, 0X116C6, | |
211 | 0X116C7, 0X116C8, 0X116C9, 0X11730, 0X11731, 0X11732, | |
212 | 0X11733, 0X11734, 0X11735, 0X11736, 0X11737, 0X11738, | |
213 | 0X11739, 0X1173A, 0X1173B, 0X118E0, 0X118E1, 0X118E2, | |
214 | 0X118E3, 0X118E4, 0X118E5, 0X118E6, 0X118E7, 0X118E8, | |
215 | 0X118E9, 0X118EA, 0X118EB, 0X118EC, 0X118ED, 0X118EE, | |
216 | 0X118EF, 0X118F0, 0X118F1, 0X118F2, 0X11C50, 0X11C51, | |
217 | 0X11C52, 0X11C53, 0X11C54, 0X11C55, 0X11C56, 0X11C57, | |
218 | 0X11C58, 0X11C59, 0X11C5A, 0X11C5B, 0X11C5C, 0X11C5D, | |
219 | 0X11C5E, 0X11C5F, 0X11C60, 0X11C61, 0X11C62, 0X11C63, | |
220 | 0X11C64, 0X11C65, 0X11C66, 0X11C67, 0X11C68, 0X11C69, | |
221 | 0X11C6A, 0X11C6B, 0X11C6C, 0X11D50, 0X11D51, 0X11D52, | |
222 | 0X11D53, 0X11D54, 0X11D55, 0X11D56, 0X11D57, 0X11D58, | |
223 | 0X11D59, 0X11DA0, 0X11DA1, 0X11DA2, 0X11DA3, 0X11DA4, | |
224 | 0X11DA5, 0X11DA6, 0X11DA7, 0X11DA8, 0X11DA9, 0X12400, | |
225 | 0X12401, 0X12402, 0X12403, 0X12404, 0X12405, 0X12406, | |
226 | 0X12407, 0X12408, 0X12409, 0X1240A, 0X1240B, 0X1240C, | |
227 | 0X1240D, 0X1240E, 0X1240F, 0X12410, 0X12411, 0X12412, | |
228 | 0X12413, 0X12414, 0X12415, 0X12416, 0X12417, 0X12418, | |
229 | 0X12419, 0X1241A, 0X1241B, 0X1241C, 0X1241D, 0X1241E, | |
230 | 0X1241F, 0X12420, 0X12421, 0X12422, 0X12423, 0X12424, | |
231 | 0X12425, 0X12426, 0X12427, 0X12428, 0X12429, 0X1242A, | |
232 | 0X1242B, 0X1242C, 0X1242D, 0X1242E, 0X1242F, 0X12430, | |
233 | 0X12431, 0X12432, 0X12433, 0X12434, 0X12435, 0X12436, | |
234 | 0X12437, 0X12438, 0X12439, 0X1243A, 0X1243B, 0X1243C, | |
235 | 0X1243D, 0X1243E, 0X1243F, 0X12440, 0X12441, 0X12442, | |
236 | 0X12443, 0X12444, 0X12445, 0X12446, 0X12447, 0X12448, | |
237 | 0X12449, 0X1244A, 0X1244B, 0X1244C, 0X1244D, 0X1244E, | |
238 | 0X1244F, 0X12450, 0X12451, 0X12452, 0X12453, 0X12454, | |
239 | 0X12455, 0X12456, 0X12457, 0X12458, 0X12459, 0X1245A, | |
240 | 0X1245B, 0X1245C, 0X1245D, 0X1245E, 0X1245F, 0X12460, | |
241 | 0X12461, 0X12462, 0X12463, 0X12464, 0X12465, 0X12466, | |
242 | 0X12467, 0X12468, 0X12469, 0X1246A, 0X1246B, 0X1246C, | |
243 | 0X1246D, 0X1246E, 0X16A60, 0X16A61, 0X16A62, 0X16A63, | |
244 | 0X16A64, 0X16A65, 0X16A66, 0X16A67, 0X16A68, 0X16A69, | |
245 | 0X16B50, 0X16B51, 0X16B52, 0X16B53, 0X16B54, 0X16B55, | |
246 | 0X16B56, 0X16B57, 0X16B58, 0X16B59, 0X16B5B, 0X16B5C, | |
247 | 0X16B5D, 0X16B5E, 0X16B5F, 0X16B60, 0X16B61, 0X16E80, | |
248 | 0X16E81, 0X16E82, 0X16E83, 0X16E84, 0X16E85, 0X16E86, | |
249 | 0X16E87, 0X16E88, 0X16E89, 0X16E8A, 0X16E8B, 0X16E8C, | |
250 | 0X16E8D, 0X16E8E, 0X16E8F, 0X16E90, 0X16E91, 0X16E92, | |
251 | 0X16E93, 0X16E94, 0X16E95, 0X16E96, 0X1D2E0, 0X1D2E1, | |
252 | 0X1D2E2, 0X1D2E3, 0X1D2E4, 0X1D2E5, 0X1D2E6, 0X1D2E7, | |
253 | 0X1D2E8, 0X1D2E9, 0X1D2EA, 0X1D2EB, 0X1D2EC, 0X1D2ED, | |
254 | 0X1D2EE, 0X1D2EF, 0X1D2F0, 0X1D2F1, 0X1D2F2, 0X1D2F3, | |
255 | 0X1D360, 0X1D361, 0X1D362, 0X1D363, 0X1D364, 0X1D365, | |
256 | 0X1D366, 0X1D367, 0X1D368, 0X1D369, 0X1D36A, 0X1D36B, | |
257 | 0X1D36C, 0X1D36D, 0X1D36E, 0X1D36F, 0X1D370, 0X1D371, | |
258 | 0X1D372, 0X1D373, 0X1D374, 0X1D375, 0X1D376, 0X1D377, | |
259 | 0X1D378, 0X1D7CE, 0X1D7CF, 0X1D7D0, 0X1D7D1, 0X1D7D2, | |
260 | 0X1D7D3, 0X1D7D4, 0X1D7D5, 0X1D7D6, 0X1D7D7, 0X1D7D8, | |
261 | 0X1D7D9, 0X1D7DA, 0X1D7DB, 0X1D7DC, 0X1D7DD, 0X1D7DE, | |
262 | 0X1D7DF, 0X1D7E0, 0X1D7E1, 0X1D7E2, 0X1D7E3, 0X1D7E4, | |
263 | 0X1D7E5, 0X1D7E6, 0X1D7E7, 0X1D7E8, 0X1D7E9, 0X1D7EA, | |
264 | 0X1D7EB, 0X1D7EC, 0X1D7ED, 0X1D7EE, 0X1D7EF, 0X1D7F0, | |
265 | 0X1D7F1, 0X1D7F2, 0X1D7F3, 0X1D7F4, 0X1D7F5, 0X1D7F6, | |
266 | 0X1D7F7, 0X1D7F8, 0X1D7F9, 0X1D7FA, 0X1D7FB, 0X1D7FC, | |
267 | 0X1D7FD, 0X1D7FE, 0X1D7FF, 0X1E8C7, 0X1E8C8, 0X1E8C9, | |
268 | 0X1E8CA, 0X1E8CB, 0X1E8CC, 0X1E8CD, 0X1E8CE, 0X1E8CF, | |
269 | 0X1E950, 0X1E951, 0X1E952, 0X1E953, 0X1E954, 0X1E955, | |
270 | 0X1E956, 0X1E957, 0X1E958, 0X1E959, 0X1EC71, 0X1EC72, | |
271 | 0X1EC73, 0X1EC74, 0X1EC75, 0X1EC76, 0X1EC77, 0X1EC78, | |
272 | 0X1EC79, 0X1EC7A, 0X1EC7B, 0X1EC7C, 0X1EC7D, 0X1EC7E, | |
273 | 0X1EC7F, 0X1EC80, 0X1EC81, 0X1EC82, 0X1EC83, 0X1EC84, | |
274 | 0X1EC85, 0X1EC86, 0X1EC87, 0X1EC88, 0X1EC89, 0X1EC8A, | |
275 | 0X1EC8B, 0X1EC8C, 0X1EC8D, 0X1EC8E, 0X1EC8F, 0X1EC90, | |
276 | 0X1EC91, 0X1EC92, 0X1EC93, 0X1EC94, 0X1EC95, 0X1EC96, | |
277 | 0X1EC97, 0X1EC98, 0X1EC99, 0X1EC9A, 0X1EC9B, 0X1EC9C, | |
278 | 0X1EC9D, 0X1EC9E, 0X1EC9F, 0X1ECA0, 0X1ECA1, 0X1ECA2, | |
279 | 0X1ECA3, 0X1ECA4, 0X1ECA5, 0X1ECA6, 0X1ECA7, 0X1ECA8, | |
280 | 0X1ECA9, 0X1ECAA, 0X1ECAB, 0X1ECAD, 0X1ECAE, 0X1ECAF, | |
281 | 0X1ECB1, 0X1ECB2, 0X1ECB3, 0X1ECB4, 0X1F100, 0X1F101, | |
282 | 0X1F102, 0X1F103, 0X1F104, 0X1F105, 0X1F106, 0X1F107, | |
283 | 0X1F108, 0X1F109, 0X1F10A, 0X1F10B, 0X1F10C, 0X20001, | |
284 | 0X20064, 0X200E2, 0X20121, 0X2092A, 0X20983, 0X2098C, | |
285 | 0X2099C, 0X20AEA, 0X20AFD, 0X20B19, 0X22390, 0X22998, | |
286 | 0X23B1B, 0X2626D, 0X2F890, | |
287 | ) | |
163 | 288 | |
164 | 289 | # Convert each hex into the literal Unicode character. |
165 | 290 | # Stop if a ValueError is raised in case of a narrow Unicode build. |
169 | 294 | for a in numeric_hex: |
170 | 295 | try: |
171 | 296 | l = py23_unichr(a) |
172 | except ValueError: | |
297 | except ValueError: # pragma: no cover | |
173 | 298 | break |
174 | 299 | if unicodedata.numeric(l, None) is None: |
175 | continue | |
300 | continue # pragma: no cover | |
176 | 301 | numeric_chars.append(l) |
177 | 302 | |
178 | 303 | # The digit characters are a subset of the numerals. |
179 | 304 | digit_chars = [a for a in numeric_chars |
180 | 305 | if unicodedata.digit(a, None) is not None] |
181 | 306 | |
307 | # The decimal characters are a subset of the numberals | |
308 | # (probably of the digits, but let's be safe). | |
309 | decimal_chars = [a for a in numeric_chars | |
310 | if unicodedata.decimal(a, None) is not None] | |
311 | ||
182 | 312 | # Create a single string with the above data. |
313 | decimals = ''.join(decimal_chars) | |
183 | 314 | digits = ''.join(digit_chars) |
184 | 315 | numeric = ''.join(numeric_chars) |
316 | digits_no_decimals = ''.join([x for x in digits if x not in decimals]) | |
317 | numeric_no_decimals = ''.join([x for x in numeric if x not in decimals]) | |
318 | ||
319 | # Some code that can be used to create the above list of hex numbers. | |
320 | if __name__ == '__main__': | |
321 | import textwrap | |
322 | from natsort.compat.py23 import py23_range | |
323 | ||
324 | hex_chars = [] | |
325 | for i in py23_range(0X110000): | |
326 | try: | |
327 | a = py23_unichr(i) | |
328 | except ValueError: | |
329 | break | |
330 | if a in set('0123456789'): | |
331 | continue | |
332 | if unicodedata.numeric(a, None) is not None: | |
333 | hex_chars.append(i) | |
334 | ||
335 | hex_string = ', '.join(['0X{:X}'.format(i) for i in hex_chars]) | |
336 | for line in textwrap.wrap(hex_string, width=60): | |
337 | print(' ', line) |
1 | 1 | """ |
2 | 2 | Utilities and definitions for natsort, mostly all used to define |
3 | 3 | the _natsort_key function. |
4 | ||
5 | SOME CONVENTIONS USED IN THIS FILE. | |
6 | ||
7 | 1 - Factory Functions | |
8 | ||
9 | Most of the logic of natsort revolves around factory functions | |
10 | that create branchless transformation functions. For example, rather | |
11 | than making a string transformation function that has an if | |
12 | statement to determine whether or not to perform .lowercase() at | |
13 | runtime for each element to transform, there is a string transformation | |
14 | factory function that will return a function that either calls | |
15 | .lowercase() or does nothing. In this way, all the branches and | |
16 | decisions are taken care of once, up front. In addition to a slight | |
17 | speed improvement, this provides a more extensible infrastructure. | |
18 | ||
19 | Each of these factory functions will end with the suffix "_factory" | |
20 | to indicate that they themselves return a function. | |
21 | ||
22 | 2 - Keyword Parameters For Local Scope | |
23 | ||
24 | Many of the closures that are created by the factory functions | |
25 | have signatures similar to the following | |
26 | ||
27 | >>> def factory(parameter): | |
28 | ... val = 'yes' if parameter else 'no' | |
29 | ... def closure(x, val=val): | |
30 | ... return '{} {}'.format(val, x) | |
31 | ... return closure | |
32 | ... | |
33 | ||
34 | The variable value is passed as the default to a keyword argument. | |
35 | This is a micro-optimization | |
36 | that ensures "val" is a local variable instead of global variable | |
37 | and thus has a slightly improved performance at runtime. | |
38 | ||
4 | 39 | """ |
5 | 40 | from __future__ import ( |
6 | 41 | print_function, |
11 | 46 | |
12 | 47 | # Std. lib imports. |
13 | 48 | import re |
14 | from math import isnan | |
15 | 49 | from warnings import warn |
16 | from os import curdir, pardir | |
17 | from os.path import split, splitext | |
18 | from itertools import islice | |
19 | from locale import localeconv | |
50 | from os import curdir as os_curdir, pardir as os_pardir | |
51 | from os.path import split as path_split, splitext as path_splitext | |
52 | from itertools import chain as ichain | |
53 | from collections import deque | |
54 | from functools import partial, reduce | |
55 | from operator import methodcaller | |
56 | from unicodedata import normalize | |
20 | 57 | |
21 | 58 | # Local imports. |
22 | from natsort.ns_enum import ns, _ns | |
23 | from natsort.unicode_numbers import digits, numeric | |
24 | from natsort.locale_help import locale_convert, grouper | |
59 | from natsort.ns_enum import ns | |
60 | from natsort.unicode_numbers import numeric_no_decimals, digits_no_decimals | |
25 | 61 | from natsort.compat.pathlib import PurePath, has_pathlib |
62 | from natsort.compat.locale import ( | |
63 | get_strxfrm, | |
64 | get_thousands_sep, | |
65 | get_decimal_point, | |
66 | ) | |
26 | 67 | from natsort.compat.py23 import ( |
27 | 68 | py23_str, |
28 | py23_zip, | |
69 | py23_map, | |
70 | py23_filter, | |
29 | 71 | PY_VERSION, |
30 | ) | |
31 | from natsort.compat.locale import ( | |
32 | dumb_sort, | |
33 | use_pyicu, | |
34 | null_string, | |
72 | NEWPY, | |
35 | 73 | ) |
36 | 74 | from natsort.compat.fastnumbers import ( |
37 | 75 | fast_float, |
38 | 76 | fast_int, |
39 | isint, | |
40 | isfloat, | |
41 | 77 | ) |
42 | ||
43 | # Group algorithm types for easy extraction | |
44 | _NUMBER_ALGORITHMS = ns.FLOAT | ns.INT | ns.UNSIGNED | ns.SIGNED | ns.NOEXP | |
45 | _ALL_BUT_PATH = (ns.F | ns.I | ns.U | ns.S | ns.N | ns.L | | |
46 | ns.IC | ns.LF | ns.G | ns.UG | ns.TYPESAFE) | |
78 | if PY_VERSION >= 3: | |
79 | long = int | |
47 | 80 | |
48 | 81 | # The regex that locates floats - include Unicode numerals. |
49 | _float_sign_exp_re = r'([-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
50 | _float_sign_exp_re = _float_sign_exp_re.format(numeric) | |
82 | _nnd = numeric_no_decimals | |
83 | _exp = r'(?:[eE][-+]?\d+)?' | |
84 | _num = r'(?:\d+\.?\d*|\.\d+)' | |
85 | _float_sign_exp_re = r'([-+]?{0}{1}|[{2}])' | |
86 | _float_sign_exp_re = _float_sign_exp_re.format(_num, _exp, _nnd) | |
51 | 87 | _float_sign_exp_re = re.compile(_float_sign_exp_re, flags=re.U) |
52 | _float_nosign_exp_re = r'([0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
53 | _float_nosign_exp_re = _float_nosign_exp_re.format(numeric) | |
88 | _float_nosign_exp_re = r'({0}{1}|[{2}])' | |
89 | _float_nosign_exp_re = _float_nosign_exp_re.format(_num, _exp, _nnd) | |
54 | 90 | _float_nosign_exp_re = re.compile(_float_nosign_exp_re, flags=re.U) |
55 | _float_sign_noexp_re = r'([-+]?[0-9]*\.?[0-9]+|[{0}])' | |
56 | _float_sign_noexp_re = _float_sign_noexp_re.format(numeric) | |
91 | _float_sign_noexp_re = r'([-+]?{0}|[{1}])' | |
92 | _float_sign_noexp_re = _float_sign_noexp_re.format(_num, _nnd) | |
57 | 93 | _float_sign_noexp_re = re.compile(_float_sign_noexp_re, flags=re.U) |
58 | _float_nosign_noexp_re = r'([0-9]*\.?[0-9]+|[{0}])' | |
59 | _float_nosign_noexp_re = _float_nosign_noexp_re.format(numeric) | |
94 | _float_nosign_noexp_re = r'({0}|[{1}])' | |
95 | _float_nosign_noexp_re = _float_nosign_noexp_re.format(_num, _nnd) | |
60 | 96 | _float_nosign_noexp_re = re.compile(_float_nosign_noexp_re, flags=re.U) |
61 | _float_sign_exp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?)|[{0}]' | |
62 | _float_sign_exp_re_c = _float_sign_exp_re_c.format(numeric) | |
63 | _float_sign_exp_re_c = re.compile(_float_sign_exp_re_c, flags=re.U) | |
64 | _float_nosign_exp_re_c = r'([0-9]*[.,]?[0-9]+(?:[eE][-+]?[0-9]+)?|[{0}])' | |
65 | _float_nosign_exp_re_c = _float_nosign_exp_re_c.format(numeric) | |
66 | _float_nosign_exp_re_c = re.compile(_float_nosign_exp_re_c, flags=re.U) | |
67 | _float_sign_noexp_re_c = r'([-+]?[0-9]*[.,]?[0-9]+|[{0}])' | |
68 | _float_sign_noexp_re_c = _float_sign_noexp_re_c.format(numeric) | |
69 | _float_sign_noexp_re_c = re.compile(_float_sign_noexp_re_c, flags=re.U) | |
70 | _float_nosign_noexp_re_c = r'([0-9]*[.,]?[0-9]+|[{0}])' | |
71 | _float_nosign_noexp_re_c = _float_nosign_noexp_re_c.format(numeric) | |
72 | _float_nosign_noexp_re_c = re.compile(_float_nosign_noexp_re_c, flags=re.U) | |
73 | 97 | |
74 | 98 | # Integer regexes - include Unicode digits. |
75 | _int_nosign_re = r'([0-9]+|[{0}])'.format(digits) | |
99 | _int_nosign_re = r'(\d+|[{0}])'.format(digits_no_decimals) | |
76 | 100 | _int_nosign_re = re.compile(_int_nosign_re, flags=re.U) |
77 | _int_sign_re = r'([-+]?[0-9]+|[{0}])'.format(digits) | |
101 | _int_sign_re = r'([-+]?\d+|[{0}])'.format(digits_no_decimals) | |
78 | 102 | _int_sign_re = re.compile(_int_sign_re, flags=re.U) |
79 | 103 | |
80 | 104 | # This dict will help select the correct regex and number conversion function. |
81 | _regex_and_num_function_chooser = { | |
82 | (ns.F | ns.S, '.'): (_float_sign_exp_re, fast_float), | |
83 | (ns.F | ns.S | ns.N, '.'): (_float_sign_noexp_re, fast_float), | |
84 | (ns.F | ns.U, '.'): (_float_nosign_exp_re, fast_float), | |
85 | (ns.F | ns.U | ns.N, '.'): (_float_nosign_noexp_re, fast_float), | |
86 | (ns.I | ns.S, '.'): (_int_sign_re, fast_int), | |
87 | (ns.I | ns.S | ns.N, '.'): (_int_sign_re, fast_int), | |
88 | (ns.I | ns.U, '.'): (_int_nosign_re, fast_int), | |
89 | (ns.I | ns.U | ns.N, '.'): (_int_nosign_re, fast_int), | |
90 | (ns.F | ns.S, ','): (_float_sign_exp_re_c, fast_float), | |
91 | (ns.F | ns.S | ns.N, ','): (_float_sign_noexp_re_c, fast_float), | |
92 | (ns.F | ns.U, ','): (_float_nosign_exp_re_c, fast_float), | |
93 | (ns.F | ns.U | ns.N, ','): (_float_nosign_noexp_re_c, fast_float), | |
94 | (ns.I | ns.S, ','): (_int_sign_re, fast_int), | |
95 | (ns.I | ns.S | ns.N, ','): (_int_sign_re, fast_int), | |
96 | (ns.I | ns.U, ','): (_int_nosign_re, fast_int), | |
97 | (ns.I | ns.U | ns.N, ','): (_int_nosign_re, fast_int), | |
105 | _regex_chooser = { | |
106 | (ns.F | ns.S): _float_sign_exp_re, | |
107 | (ns.F | ns.S | ns.N): _float_sign_noexp_re, | |
108 | (ns.F | ns.U): _float_nosign_exp_re, | |
109 | (ns.F | ns.U | ns.N): _float_nosign_noexp_re, | |
110 | (ns.I | ns.S): _int_sign_re, | |
111 | (ns.I | ns.S | ns.N): _int_sign_re, | |
112 | (ns.I | ns.U): _int_nosign_re, | |
113 | (ns.I | ns.U | ns.N): _int_nosign_re, | |
98 | 114 | } |
99 | 115 | |
100 | # Dict to select checker function from converter function | |
101 | _conv_to_check = {fast_float: isfloat, fast_int: isint} | |
116 | ||
117 | def _no_op(x): | |
118 | """A function that does nothing.""" | |
119 | return x | |
120 | ||
121 | ||
122 | def _normalize_input_factory(alg): | |
123 | """Create a function that will normalize unicode input data.""" | |
124 | normalization_form = 'NFKD' if alg & ns.COMPATIBILITYNORMALIZE else 'NFD' | |
125 | ||
126 | if NEWPY: | |
127 | return partial(normalize, normalization_form) | |
128 | else: | |
129 | def func(x): | |
130 | """Normalize unicode input.""" | |
131 | if isinstance(x, py23_str): # unicode | |
132 | return normalize(normalization_form, x) | |
133 | else: | |
134 | return x | |
135 | return func | |
136 | ||
137 | ||
138 | def _natsort_key(val, key, string_func, bytes_func, num_func): | |
139 | """\ | |
140 | Key to sort strings and numbers naturally. | |
141 | ||
142 | It works by separating out the numbers from the strings. This function for | |
143 | internal use only. See the natsort_keygen documentation for details of each | |
144 | parameter. | |
145 | ||
146 | Parameters | |
147 | ---------- | |
148 | val : str | unicode | |
149 | key : callable | None | |
150 | string_func : callable | |
151 | bytes_func : callable | |
152 | num_func : callable | |
153 | ||
154 | Returns | |
155 | ------- | |
156 | out : tuple | |
157 | The modified value with numbers extracted. | |
158 | ||
159 | """ | |
160 | ||
161 | # Apply key if needed | |
162 | if key is not None: | |
163 | val = key(val) | |
164 | ||
165 | # Assume the input are strings, which is the most common case | |
166 | try: | |
167 | return string_func(val) | |
168 | except (TypeError, AttributeError): | |
169 | ||
170 | # If bytes type, use the bytes_func | |
171 | if type(val) in (bytes,): | |
172 | return bytes_func(val) | |
173 | ||
174 | # Otherwise, assume it is an iterable that must be parses recursively. | |
175 | # Do not apply the key recursively. | |
176 | try: | |
177 | return tuple(_natsort_key( | |
178 | x, None, string_func, bytes_func, num_func | |
179 | ) for x in val) | |
180 | ||
181 | # If that failed, it must be a number. | |
182 | except TypeError: | |
183 | return num_func(val) | |
184 | ||
185 | ||
186 | def _parse_bytes_factory(alg): | |
187 | """Create a function that will format a bytes string in a tuple.""" | |
188 | # We don't worry about ns.UNGROUPLETTERS | ns.LOCALEALPHA because | |
189 | # bytes cannot be compared to strings. | |
190 | if alg & ns.PATH and alg & ns.IGNORECASE: | |
191 | return lambda x: ((x.lower(),),) | |
192 | elif alg & ns.PATH: | |
193 | return lambda x: ((x,),) | |
194 | elif alg & ns.IGNORECASE: | |
195 | return lambda x: (x.lower(),) | |
196 | else: | |
197 | return lambda x: (x,) | |
198 | ||
199 | ||
200 | def _parse_number_factory(alg, sep, pre_sep): | |
201 | """Create a function that will properly format a number in a tuple.""" | |
202 | nan_replace = float('+inf') if alg & ns.NANLAST else float('-inf') | |
203 | ||
204 | def func(val, nan_replace=nan_replace, sep=sep): | |
205 | """Given a number, place it in a tuple with a leading null string.""" | |
206 | return sep, nan_replace if val != val else val | |
207 | ||
208 | # Return the function, possibly wrapping in tuple if PATH is selected. | |
209 | if alg & ns.PATH and alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: | |
210 | return lambda x: (((pre_sep,), func(x)),) | |
211 | elif alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: | |
212 | return lambda x: ((pre_sep,), func(x)) | |
213 | elif alg & ns.PATH: | |
214 | return lambda x: (func(x),) | |
215 | else: | |
216 | return func | |
217 | ||
218 | ||
219 | def _parse_string_factory(alg, sep, splitter, | |
220 | input_transform, | |
221 | component_transform, | |
222 | final_transform): | |
223 | """Create a function that will properly split and format a string.""" | |
224 | # Sometimes we store the "original" input before transformation, | |
225 | # sometimes after. | |
226 | orig_after_xfrm = not (alg & ns._DUMB and alg & ns.LOCALEALPHA) | |
227 | original_func = input_transform if orig_after_xfrm else _no_op | |
228 | normalize_input = _normalize_input_factory(alg) | |
229 | ||
230 | def func(x): | |
231 | # Apply string input transformation function and return to x. | |
232 | # Original function is usually a no-op, but some algorithms require it | |
233 | # to also be the transformation function. | |
234 | x = normalize_input(x) | |
235 | x, original = input_transform(x), original_func(x) | |
236 | x = splitter(x) # Split string into components. | |
237 | x = py23_filter(None, x) # Remove empty strings. | |
238 | x = py23_map(component_transform, x) # Apply transform on components. | |
239 | x = _sep_inserter(x, sep) # Insert '' between numbers. | |
240 | return final_transform(x, original) # Apply the final transform. | |
241 | ||
242 | return func | |
243 | ||
244 | ||
245 | def _parse_path_factory(str_split): | |
246 | """Create a function that will properly split and format a path.""" | |
247 | return lambda x: tuple(py23_map(str_split, _path_splitter(x))) | |
248 | ||
249 | ||
250 | def _sep_inserter(iterable, sep): | |
251 | """Insert '' between numbers.""" | |
252 | ||
253 | # Get the first element. If StopIteration is raised, that's OK. | |
254 | # Since we are controlling the types of the input, 'type' is used | |
255 | # instead of 'isinstance' for the small speed advantage it offers. | |
256 | try: | |
257 | types = (int, float, long) | |
258 | first = next(iterable) | |
259 | if type(first) in types: | |
260 | yield sep | |
261 | yield first | |
262 | ||
263 | # Now, check if pair of elements are both numbers. If so, add ''. | |
264 | second = next(iterable) | |
265 | if type(first) in types and type(second) in types: | |
266 | yield sep | |
267 | yield second | |
268 | ||
269 | # Now repeat in a loop. | |
270 | for x in iterable: | |
271 | first, second = second, x | |
272 | if type(first) in types and type(second) in types: | |
273 | yield sep | |
274 | yield second | |
275 | except StopIteration: | |
276 | # Catch StopIteration per deprecation in PEP 479: | |
277 | # "Change StopIteration handling inside generators" | |
278 | return | |
279 | ||
280 | ||
281 | def _input_string_transform_factory(alg): | |
282 | """ | |
283 | Given a set of natsort algorithms, return the function to operate | |
284 | on the pre-split input string according to the user's request. | |
285 | """ | |
286 | # Shortcuts. | |
287 | lowfirst = alg & ns.LOWERCASEFIRST | |
288 | dumb = alg & ns._DUMB | |
289 | ||
290 | # Build the chain of functions to execute in order. | |
291 | function_chain = [] | |
292 | if (dumb and not lowfirst) or (lowfirst and not dumb): | |
293 | function_chain.append(methodcaller('swapcase')) | |
294 | ||
295 | if alg & ns.IGNORECASE: | |
296 | if NEWPY: | |
297 | function_chain.append(methodcaller('casefold')) | |
298 | else: | |
299 | function_chain.append(methodcaller('lower')) | |
300 | ||
301 | if alg & ns.LOCALENUM: | |
302 | # Create a regular expression that will remove thousands separators. | |
303 | strip_thousands = r''' | |
304 | (?<=[0-9]{{1}}) # At least 1 number | |
305 | (?<![0-9]{{4}}) # No more than 3 numbers | |
306 | {nodecimal} # Cannot follow decimal | |
307 | {thou} # The thousands separator | |
308 | (?=[0-9]{{3}} # Three numbers must follow | |
309 | ([^0-9]|$) # But a non-number after that | |
310 | ) | |
311 | ''' | |
312 | nodecimal = r'' | |
313 | if alg & ns.FLOAT: | |
314 | # Make a regular expression component that will ensure no | |
315 | # separators are removed after a decimal point. | |
316 | d = get_decimal_point() | |
317 | d = r'\.' if d == r'.' else d | |
318 | nodecimal += r'(?<!' + d + r'[0-9])' | |
319 | nodecimal += r'(?<!' + d + r'[0-9]{2})' | |
320 | nodecimal += r'(?<!' + d + r'[0-9]{3})' | |
321 | strip_thousands = strip_thousands.format(thou=get_thousands_sep(), | |
322 | nodecimal=nodecimal) | |
323 | strip_thousands = re.compile(strip_thousands, flags=re.VERBOSE) | |
324 | function_chain.append(partial(strip_thousands.sub, '')) | |
325 | ||
326 | # Create a regular expression that will change the decimal point to | |
327 | # a period if not already a period. | |
328 | decimal = get_decimal_point() | |
329 | if alg & ns.FLOAT and decimal != '.': | |
330 | switch_decimal = r'(?<=[0-9]){decimal}|{decimal}(?=[0-9])' | |
331 | switch_decimal = switch_decimal.format(decimal=decimal) | |
332 | switch_decimal = re.compile(switch_decimal) | |
333 | function_chain.append(partial(switch_decimal.sub, '.')) | |
334 | ||
335 | # Return the chained functions. | |
336 | return chain_functions(function_chain) | |
337 | ||
338 | ||
339 | def _string_component_transform_factory(alg): | |
340 | """ | |
341 | Given a set of natsort algorithms, return the function to operate | |
342 | on the post-split strings according to the user's request. | |
343 | """ | |
344 | # Shortcuts. | |
345 | use_locale = alg & ns.LOCALEALPHA | |
346 | dumb = alg & ns._DUMB | |
347 | group_letters = (alg & ns.GROUPLETTERS) or (use_locale and dumb) | |
348 | nan_val = float('+inf') if alg & ns.NANLAST else float('-inf') | |
349 | ||
350 | # Build the chain of functions to execute in order. | |
351 | func_chain = [] | |
352 | if group_letters: | |
353 | func_chain.append(_groupletters) | |
354 | if use_locale: | |
355 | func_chain.append(get_strxfrm()) | |
356 | kwargs = {'key': chain_functions(func_chain)} if func_chain else {} | |
357 | ||
358 | # Return the correct chained functions. | |
359 | if alg & ns.FLOAT: | |
360 | kwargs['nan'] = nan_val | |
361 | return partial(fast_float, **kwargs) | |
362 | else: | |
363 | return partial(fast_int, **kwargs) | |
364 | ||
365 | ||
366 | def _final_data_transform_factory(alg, sep, pre_sep): | |
367 | """ | |
368 | Given a set of natsort algorithms, return the function to operate | |
369 | on the post-parsed strings according to the user's request. | |
370 | """ | |
371 | if alg & ns.UNGROUPLETTERS and alg & ns.LOCALEALPHA: | |
372 | swap = alg & ns._DUMB and alg & ns.LOWERCASEFIRST | |
373 | transform = methodcaller('swapcase') if swap else _no_op | |
374 | ||
375 | def func(split_val, val, transform=transform): | |
376 | """ | |
377 | Return a tuple with the first character of the first element | |
378 | of the return value as the first element, and the return value | |
379 | as the second element. This will be used to perform gross sorting | |
380 | by the first letter. | |
381 | """ | |
382 | split_val = tuple(split_val) | |
383 | if not split_val: | |
384 | return (), () | |
385 | elif split_val[0] == sep: | |
386 | return (pre_sep,), split_val | |
387 | else: | |
388 | return (transform(val[0]),), split_val | |
389 | return func | |
390 | else: | |
391 | return lambda split_val, val: tuple(split_val) | |
392 | ||
393 | ||
394 | def _groupletters(x, _low=methodcaller('casefold' if NEWPY else 'lower')): | |
395 | """Double all characters, making doubled letters lowercase.""" | |
396 | return ''.join(ichain.from_iterable((_low(y), y) for y in x)) | |
397 | ||
398 | ||
399 | def chain_functions(functions): | |
400 | """ | |
401 | Chain a list of single-argument functions together and return. | |
402 | ||
403 | The functions are applied in list order, and the output of the | |
404 | previous functions is passed to the next function. | |
405 | ||
406 | Parameters | |
407 | ---------- | |
408 | functions : list | |
409 | A list of single-argument functions to chain together. | |
410 | ||
411 | Returns | |
412 | ------- | |
413 | A single argument function. | |
414 | ||
415 | Examples | |
416 | -------- | |
417 | Chain several functions together! | |
418 | ||
419 | >>> funcs = [lambda x: x * 4, len, lambda x: x + 5] | |
420 | >>> func = chain_functions(funcs) | |
421 | >>> func('hey') | |
422 | 17 | |
423 | ||
424 | """ | |
425 | functions = list(functions) | |
426 | if not functions: | |
427 | return _no_op | |
428 | elif len(functions) == 1: | |
429 | return functions[0] | |
430 | else: | |
431 | # See https://stackoverflow.com/a/39123400/1399279 | |
432 | return partial(reduce, lambda res, f: f(res), functions) | |
102 | 433 | |
103 | 434 | |
104 | 435 | def _do_decoding(s, encoding): |
109 | 440 | raise |
110 | 441 | except (AttributeError, TypeError): |
111 | 442 | return s |
443 | ||
444 | ||
445 | def _path_splitter(s, _d_match=re.compile(r'\.\d').match): | |
446 | """Split a string into its path components. Assumes a string is a path.""" | |
447 | # If a PathLib Object, use it's functionality to perform the split. | |
448 | if has_pathlib and isinstance(s, PurePath): | |
449 | s = py23_str(s) | |
450 | path_parts = deque() | |
451 | p_appendleft = path_parts.appendleft | |
452 | # Continue splitting the path from the back until we have reached | |
453 | # '..' or '.', or until there is nothing left to split. | |
454 | path_location = s | |
455 | while path_location != os_curdir and path_location != os_pardir: | |
456 | parent_path = path_location | |
457 | path_location, child_path = path_split(parent_path) | |
458 | if path_location == parent_path: | |
459 | break | |
460 | p_appendleft(child_path) | |
461 | ||
462 | # This last append is the base path. | |
463 | # Only append if the string is non-empty. | |
464 | if path_location: | |
465 | p_appendleft(path_location) | |
466 | ||
467 | # Now, split off the file extensions using a similar method to above. | |
468 | # Continue splitting off file extensions until we reach a decimal number | |
469 | # or there are no more extensions. | |
470 | # We are not using built-in functionality of PathLib here because of | |
471 | # the recursive splitting up to a decimal. | |
472 | base = path_parts.pop() | |
473 | base_parts = deque() | |
474 | b_appendleft = base_parts.appendleft | |
475 | while True: | |
476 | front = base | |
477 | base, ext = path_splitext(front) | |
478 | if _d_match(ext) or not ext: | |
479 | # Reset base to before the split if the split is invalid. | |
480 | base = front | |
481 | break | |
482 | b_appendleft(ext) | |
483 | b_appendleft(base) | |
484 | ||
485 | # Return the split parent paths and then the split basename. | |
486 | return ichain(path_parts, base_parts) | |
112 | 487 | |
113 | 488 | |
114 | 489 | def _args_to_enum(**kwargs): |
122 | 497 | msg = "The 'number_type' argument is deprecated as of 3.5.0, " |
123 | 498 | msg += "please use 'alg=ns.FLOAT', 'alg=ns.INT', or 'alg=ns.VERSION'" |
124 | 499 | warn(msg, DeprecationWarning) |
125 | alg |= (_ns['FLOAT'] * bool(kwargs['number_type'] is float)) | |
126 | alg |= (_ns['INT'] * bool(kwargs['number_type'] in (int, None))) | |
127 | alg |= (_ns['SIGNED'] * (kwargs['number_type'] not in (float, None))) | |
500 | alg |= (ns.FLOAT * bool(kwargs['number_type'] is float)) | |
501 | alg |= (ns.INT * bool(kwargs['number_type'] in (int, None))) | |
502 | alg |= (ns.SIGNED * (kwargs['number_type'] not in (float, None))) | |
128 | 503 | if 'signed' in kwargs and kwargs['signed'] is not None: |
129 | 504 | msg = "The 'signed' argument is deprecated as of 3.5.0, " |
130 | 505 | msg += "please use 'alg=ns.SIGNED'." |
131 | 506 | warn(msg, DeprecationWarning) |
132 | alg |= (_ns['SIGNED'] * bool(kwargs['signed'])) | |
507 | alg |= (ns.SIGNED * bool(kwargs['signed'])) | |
133 | 508 | if 'exp' in kwargs and kwargs['exp'] is not None: |
134 | 509 | msg = "The 'exp' argument is deprecated as of 3.5.0, " |
135 | 510 | msg += "please use 'alg=ns.NOEXP'." |
136 | 511 | warn(msg, DeprecationWarning) |
137 | alg |= (_ns['NOEXP'] * (not kwargs['exp'])) | |
512 | alg |= (ns.NOEXP * (not kwargs['exp'])) | |
138 | 513 | if 'as_path' in kwargs and kwargs['as_path'] is not None: |
139 | 514 | msg = "The 'as_path' argument is deprecated as of 3.5.0, " |
140 | 515 | msg += "please use 'alg=ns.PATH'." |
141 | 516 | warn(msg, DeprecationWarning) |
142 | alg |= (_ns['PATH'] * kwargs['as_path']) | |
143 | if 'py3_safe' in kwargs and kwargs['py3_safe'] is not None: | |
144 | msg = "The 'py3_safe' argument is deprecated as of 3.5.0, " | |
145 | msg += "please use 'alg=ns.TYPESAFE'." | |
146 | warn(msg, DeprecationWarning) | |
147 | alg |= (_ns['TYPESAFE'] * kwargs['py3_safe']) | |
517 | alg |= (ns.PATH * kwargs['as_path']) | |
148 | 518 | return alg |
149 | ||
150 | ||
151 | def _number_extracter(s, regex, numconv, py3_safe, use_locale, group_letters): | |
152 | """Helper to separate the string input into numbers and strings.""" | |
153 | conv_check = (numconv, _conv_to_check[numconv]) | |
154 | ||
155 | # Split the input string by numbers. | |
156 | # If the input is not a string, TypeError is raised. | |
157 | s = regex.split(s) | |
158 | ||
159 | # Now convert the numbers to numbers, and leave strings as strings. | |
160 | # Take into account locale if needed, and group letters if needed. | |
161 | # Remove empty strings from the list. | |
162 | if use_locale: | |
163 | s = [locale_convert(x, conv_check, group_letters) for x in s if x] | |
164 | elif group_letters: | |
165 | s = [grouper(x, conv_check) for x in s if x] | |
166 | else: | |
167 | s = [numconv(x) for x in s if x] | |
168 | ||
169 | # If the list begins with a number, lead with an empty string. | |
170 | # This is used to get around the "unorderable types" issue. | |
171 | if not s: # Return empty list for empty results. | |
172 | return [] | |
173 | elif conv_check[1](s[0], num_only=True): | |
174 | s = [null_string if use_locale else ''] + s | |
175 | ||
176 | # The _py3_safe function inserts "" between numbers in the list, | |
177 | # and is used to get around "unorderable types" in complex cases. | |
178 | # It is a separate function that needs to be requested specifically | |
179 | # because it is expensive to call. | |
180 | return _py3_safe(s, use_locale, conv_check[1]) if py3_safe else s | |
181 | ||
182 | ||
183 | def _path_splitter(s, _d_match=re.compile(r'\.\d').match): | |
184 | """Split a string into its path components. Assumes a string is a path.""" | |
185 | path_parts = [] | |
186 | p_append = path_parts.append | |
187 | # Convert a pathlib PurePath object to a string. | |
188 | if has_pathlib and isinstance(s, PurePath): | |
189 | path_location = str(s) | |
190 | else: # pragma: no cover | |
191 | path_location = s | |
192 | ||
193 | # Continue splitting the path from the back until we have reached | |
194 | # '..' or '.', or until there is nothing left to split. | |
195 | while path_location != curdir and path_location != pardir: | |
196 | parent_path = path_location | |
197 | path_location, child_path = split(parent_path) | |
198 | if path_location == parent_path: | |
199 | break | |
200 | p_append(child_path) | |
201 | ||
202 | # This last append is the base path. | |
203 | # Only append if the string is non-empty. | |
204 | if path_location: | |
205 | p_append(path_location) | |
206 | ||
207 | # We created this list in reversed order, so we now correct the order. | |
208 | path_parts.reverse() | |
209 | ||
210 | # Now, split off the file extensions using a similar method to above. | |
211 | # Continue splitting off file extensions until we reach a decimal number | |
212 | # or there are no more extensions. | |
213 | base = path_parts.pop() | |
214 | base_parts = [] | |
215 | b_append = base_parts.append | |
216 | while True: | |
217 | front = base | |
218 | base, ext = splitext(front) | |
219 | if _d_match(ext) or not ext: | |
220 | # Reset base to before the split if the split is invalid. | |
221 | base = front | |
222 | break | |
223 | b_append(ext) | |
224 | b_append(base) | |
225 | base_parts.reverse() | |
226 | ||
227 | # Return the split parent paths and then the split basename. | |
228 | return path_parts + base_parts | |
229 | ||
230 | ||
231 | def _py3_safe(parsed_list, use_locale, check): | |
232 | """Insert '' between two numbers.""" | |
233 | length = len(parsed_list) | |
234 | if length < 2: | |
235 | return parsed_list | |
236 | else: | |
237 | new_list = [parsed_list[0]] | |
238 | nl_append = new_list.append | |
239 | for before, after in py23_zip(islice(parsed_list, 0, length-1), | |
240 | islice(parsed_list, 1, None)): | |
241 | if check(before, num_only=True) and check(after, num_only=True): | |
242 | nl_append(null_string if use_locale else '') | |
243 | nl_append(after) | |
244 | return new_list | |
245 | ||
246 | ||
247 | def _fix_nan(ret, alg): | |
248 | """Detect an NaN and replace or raise a ValueError.""" | |
249 | t = [] | |
250 | for r in ret: | |
251 | if isfloat(r, num_only=True) and isnan(r): | |
252 | if alg & _ns['NANLAST']: | |
253 | t.append(float('+inf')) | |
254 | else: | |
255 | t.append(float('-inf')) | |
256 | else: | |
257 | t.append(r) | |
258 | return tuple(t) | |
259 | ||
260 | ||
261 | def _natsort_key(val, key, alg): | |
262 | """\ | |
263 | Key to sort strings and numbers naturally. | |
264 | ||
265 | It works by separating out the numbers from the strings. This function for | |
266 | internal use only. See the natsort_keygen documentation for details of each | |
267 | parameter. | |
268 | ||
269 | Parameters | |
270 | ---------- | |
271 | val : {str, unicode} | |
272 | key : callable | |
273 | alg : ns enum | |
274 | ||
275 | Returns | |
276 | ------- | |
277 | out : tuple | |
278 | The modified value with numbers extracted. | |
279 | ||
280 | """ | |
281 | ||
282 | # Convert the arguments to the proper input tuple | |
283 | try: | |
284 | use_locale = alg & _ns['LOCALE'] | |
285 | inp_options = (alg & _NUMBER_ALGORITHMS, | |
286 | localeconv()['decimal_point'] if use_locale else '.') | |
287 | except TypeError: | |
288 | msg = "_natsort_key: 'alg' argument must be from the enum 'ns'" | |
289 | raise ValueError(msg+', got {0}'.format(py23_str(alg))) | |
290 | ||
291 | # Get the proper regex and conversion function. | |
292 | try: | |
293 | regex, num_function = _regex_and_num_function_chooser[inp_options] | |
294 | except KeyError: # pragma: no cover | |
295 | if inp_options[1] not in ('.', ','): # pragma: no cover | |
296 | raise ValueError("_natsort_key: currently natsort only supports " | |
297 | "the decimal separators '.' and ','. " | |
298 | "Please file a bug report.") | |
299 | else: | |
300 | raise | |
301 | else: | |
302 | # Apply key if needed. | |
303 | if key is not None: | |
304 | val = key(val) | |
305 | ||
306 | # If this is a path, convert it. | |
307 | # An AttrubuteError is raised if not a string. | |
308 | split_as_path = False | |
309 | if alg & _ns['PATH']: | |
310 | try: | |
311 | val = _path_splitter(val) | |
312 | except AttributeError: | |
313 | pass | |
314 | else: | |
315 | # Record that this string was split as a path so that | |
316 | # we don't set PATH in the recursive call. | |
317 | split_as_path = True | |
318 | ||
319 | # Assume the input are strings, which is the most common case. | |
320 | # Apply the string modification if needed. | |
321 | orig_val = val | |
322 | try: | |
323 | lowfirst = alg & _ns['LOWERCASEFIRST'] | |
324 | dumb = dumb_sort() if use_locale else False | |
325 | if use_locale and dumb and not lowfirst: | |
326 | val = val.swapcase() # Compensate for bad locale lib. | |
327 | elif lowfirst and not (use_locale and dumb): | |
328 | val = val.swapcase() | |
329 | if alg & _ns['IGNORECASE']: | |
330 | val = val.casefold() if PY_VERSION >= 3.3 else val.lower() | |
331 | gl = alg & _ns['GROUPLETTERS'] | |
332 | ret = tuple(_number_extracter(val, | |
333 | regex, | |
334 | num_function, | |
335 | alg & _ns['TYPESAFE'], | |
336 | use_locale, | |
337 | gl or (use_locale and dumb))) | |
338 | # Handle NaN. | |
339 | if any(isfloat(x, num_only=True) and isnan(x) for x in ret): | |
340 | ret = _fix_nan(ret, alg) | |
341 | # For UNGROUPLETTERS, so the high level grouping can occur | |
342 | # based on the first letter of the string. | |
343 | # Do no locale transformation of the characters. | |
344 | if use_locale and alg & _ns['UNGROUPLETTERS']: | |
345 | if not ret: | |
346 | return (ret, ret) | |
347 | elif ret[0] == null_string: | |
348 | return ((b'' if use_pyicu else '',), ret) | |
349 | elif dumb: | |
350 | if lowfirst: | |
351 | return ((orig_val[0].swapcase(),), ret) | |
352 | else: | |
353 | return ((orig_val[0],), ret) | |
354 | else: | |
355 | return ((val[0],), ret) | |
356 | else: | |
357 | return ret | |
358 | except (TypeError, AttributeError): | |
359 | # Check if it is a bytes type, and if so return as a | |
360 | # one element tuple. | |
361 | if type(val) in (bytes,): | |
362 | return (val.lower(),) if alg & _ns['IGNORECASE'] else (val,) | |
363 | # If not strings, assume it is an iterable that must | |
364 | # be parsed recursively. Do not apply the key recursively. | |
365 | # If this string was split as a path, turn off 'PATH'. | |
366 | try: | |
367 | was_path = alg & _ns['PATH'] | |
368 | newalg = alg & _ALL_BUT_PATH | |
369 | newalg |= (was_path * (not split_as_path)) | |
370 | return tuple([_natsort_key(x, None, newalg) for x in val]) | |
371 | # If there is still an error, it must be a number. | |
372 | # Return as-is, with a leading empty string. | |
373 | except TypeError: | |
374 | n = null_string if use_locale else '' | |
375 | if isfloat(val, num_only=True) and isnan(val): | |
376 | val = _fix_nan([val], alg)[0] | |
377 | return ((n, val,),) if alg & _ns['PATH'] else (n, val,) |
0 | [bumpversion] | |
1 | current_version = 5.3.3 | |
2 | commit = True | |
3 | tag = True | |
4 | tag_name = {new_version} | |
5 | ||
6 | [metadata] | |
7 | author = Seth M. Morton | |
8 | author_email = drtuba78@gmail.com | |
9 | url = https://github.com/SethMMorton/natsort | |
10 | description = Simple yet flexible natural sorting in Python. | |
11 | long_description = file: README.rst | |
12 | license = MIT | |
13 | classifiers = | |
14 | Development Status :: 5 - Production/Stable | |
15 | Intended Audience :: Developers | |
16 | Intended Audience :: Science/Research | |
17 | Intended Audience :: System Administrators | |
18 | Intended Audience :: Information Technology | |
19 | Intended Audience :: Financial and Insurance Industry | |
20 | Operating System :: OS Independent | |
21 | License :: OSI Approved :: MIT License | |
22 | Natural Language :: English | |
23 | Programming Language :: Python :: 2 | |
24 | Programming Language :: Python :: 2.6 | |
25 | Programming Language :: Python :: 2.7 | |
26 | Programming Language :: Python :: 3 | |
27 | Programming Language :: Python :: 3.3 | |
28 | Programming Language :: Python :: 3.4 | |
29 | Programming Language :: Python :: 3.5 | |
30 | Programming Language :: Python :: 3.6 | |
31 | Topic :: Scientific/Engineering :: Information Analysis | |
32 | Topic :: Utilities | |
33 | Topic :: Text Processing | |
34 | ||
0 | 35 | [bdist_wheel] |
1 | 36 | universal = 1 |
2 | 37 | |
3 | 38 | [sdist] |
4 | formats = zip,gztar | |
39 | formats = gztar | |
5 | 40 | |
6 | [pytest] | |
41 | [bumpversion:file:setup.py] | |
42 | ||
43 | [bumpversion:file:natsort/_version.py] | |
44 | ||
45 | [bumpversion:file:docs/source/conf.py] | |
46 | ||
47 | [bumpversion:file:docs/source/changelog.rst] | |
48 | search = X.X.X | |
49 | replace = {new_version} | |
50 | ||
51 | [tool:pytest] | |
7 | 52 | flakes-ignore = |
8 | natsort/compat/py23.py UndefinedName | |
9 | natsort/__init__.py UnusedImport | |
10 | natsort/compat/* UnusedImport | |
11 | docs/source/conf.py ALL | |
12 | test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused | |
13 | test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused | |
14 | test_natsort/compat/* UnusedImport | |
15 | ||
16 | pep8ignore = | |
17 | natsort/ns_enum.py E126 E241 E123 | |
18 | test_natsort/test_natsort.py E501 E241 E221 | |
19 | test_natsort/test_utils.py E501 E241 E221 | |
20 | test_natsort/test_locale_help.py E501 E241 E221 | |
21 | test_natsort/test_main.py E501 E241 E221 | |
22 | test_natsort/profile_natsorted.py ALL | |
23 | docs/source/conf.py ALL | |
53 | natsort/compat/py23.py UndefinedName | |
54 | natsort/__init__.py UnusedImport | |
55 | natsort/compat/* UnusedImport | |
56 | docs/source/conf.py ALL | |
57 | test_natsort/test_natsort.py UnusedImport RedefinedWhileUnused | |
58 | test_natsort/test_locale_help.py UnusedImport RedefinedWhileUnused | |
59 | test_natsort/compat/* UnusedImport | |
60 | pep8ignore = | |
61 | natsort/ns_enum.py E126 E241 E123 E221 | |
62 | test_natsort/test_*.py E501 E241 E221 | |
63 | test_natsort/test_natsort_keygen.py E501 E241 E221 E701 | |
64 | test_natsort/profile_natsorted.py ALL | |
65 | docs/source/conf.py ALL | |
24 | 66 | |
25 | 67 | [flake8] |
26 | 68 | max-line-length = 160 |
27 | 69 | ignore = E231,E302 |
70 |
0 | 0 | #! /usr/bin/env python |
1 | 1 | |
2 | # Std. lib imports | |
3 | import re | |
4 | import sys | |
5 | from os.path import join | |
6 | ||
7 | # Non-std lib imports | |
8 | from setuptools import setup | |
9 | from setuptools.command.test import test as TestCommand | |
10 | ||
11 | ||
12 | class PyTest(TestCommand): | |
13 | """Custom command to run pytest on all code.""" | |
14 | ||
15 | def finalize_options(self): | |
16 | TestCommand.finalize_options(self) | |
17 | self.test_args = [] | |
18 | self.test_suite = True | |
19 | ||
20 | def run_tests(self): | |
21 | # import here, cause outside the eggs aren't loaded | |
22 | import pytest | |
23 | err1 = pytest.main(['--cov', 'natsort', | |
24 | '--cov-report', 'term-missing', | |
25 | '--flakes', | |
26 | '--pep8', | |
27 | '-s', | |
28 | # '--failed', | |
29 | # '-v', | |
30 | ]) | |
31 | err2 = pytest.main(['--doctest-modules', 'natsort']) | |
32 | err3 = pytest.main(['README.rst', | |
33 | 'docs/source/intro.rst', | |
34 | 'docs/source/examples.rst']) | |
35 | return err1 | err2 | err3 | |
36 | ||
37 | ||
38 | # Read the natsort.py file for the module version number | |
39 | VERSIONFILE = join('natsort', '_version.py') | |
40 | versionsearch = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]") | |
41 | with open(VERSIONFILE, "rt") as fl: | |
42 | for line in fl: | |
43 | m = versionsearch.search(line) | |
44 | if m: | |
45 | VERSION = m.group(1) | |
46 | break | |
47 | else: | |
48 | s = "Unable to locate version string in {0}" | |
49 | raise RuntimeError(s.format(VERSIONFILE)) | |
50 | ||
51 | # Read in the documentation for the long_description | |
52 | DESCRIPTION = 'Sort lists naturally' | |
53 | try: | |
54 | with open('README.rst') as fl: | |
55 | LONG_DESCRIPTION = fl.read() | |
56 | except IOError: | |
57 | LONG_DESCRIPTION = DESCRIPTION | |
58 | ||
59 | # The argparse module was introduced in python 2.7 or python 3.2 | |
60 | REQUIRES = 'argparse' if sys.version[:3] in ('2.6', '3.0', '3.1') else '' | |
61 | ||
62 | # Testing needs pytest, and mock if less than python 3.3 | |
63 | TESTS_REQUIRE = ['pytest', 'pytest-pep8', 'pytest-flakes', | |
64 | 'pytest-cov', 'pytest-cache', 'hypothesis'] | |
65 | ||
66 | if (sys.version.startswith('2') or | |
67 | (sys.version.startswith('3') and int(sys.version.split('.')[1]) < 3)): | |
68 | TESTS_REQUIRE.append('mock') | |
69 | if (sys.version.startswith('2') or | |
70 | (sys.version.startswith('3') and int(sys.version.split('.')[1]) < 4)): | |
71 | TESTS_REQUIRE.append('pathlib') | |
72 | ||
73 | # The setup parameters | |
2 | from setuptools import setup, find_packages | |
74 | 3 | setup( |
75 | 4 | name='natsort', |
76 | version=VERSION, | |
77 | author='Seth M. Morton', | |
78 | author_email='drtuba78@gmail.com', | |
79 | url='https://github.com/SethMMorton/natsort', | |
80 | license='MIT', | |
81 | install_requires=REQUIRES, | |
82 | packages=['natsort', 'natsort.compat'], | |
5 | version='5.3.3', | |
6 | packages=find_packages(), | |
7 | install_requires=["argparse; python_version < '2.7'"], | |
83 | 8 | entry_points={'console_scripts': ['natsort = natsort.__main__:main']}, |
84 | tests_require=TESTS_REQUIRE, | |
85 | cmdclass={'test': PyTest}, | |
86 | description=DESCRIPTION, | |
87 | long_description=LONG_DESCRIPTION, | |
88 | classifiers=( | |
89 | 'Development Status :: 5 - Production/Stable', | |
90 | 'Intended Audience :: Developers', | |
91 | 'Intended Audience :: Science/Research', | |
92 | 'Intended Audience :: System Administrators', | |
93 | 'Intended Audience :: Information Technology', | |
94 | 'Operating System :: OS Independent', | |
95 | 'License :: OSI Approved :: MIT License', | |
96 | 'Natural Language :: English', | |
97 | 'Programming Language :: Python :: 2.6', | |
98 | 'Programming Language :: Python :: 2.7', | |
99 | 'Programming Language :: Python :: 3', | |
100 | 'Topic :: Scientific/Engineering :: Information Analysis', | |
101 | 'Topic :: Utilities', | |
102 | ) | |
9 | extras_require={ | |
10 | 'fast': ["fastnumbers >= 2.0.0; python_version > '2.6'"], | |
11 | 'icu': ["PyICU >= 1.0.0"] | |
12 | } | |
103 | 13 | ) |
0 | # -*- coding: utf-8 -*- | |
1 | from __future__ import ( | |
2 | print_function, | |
3 | division, | |
4 | unicode_literals, | |
5 | absolute_import | |
6 | ) | |
7 | import sys | |
8 | import compat.mock | |
9 | ||
10 | major_minor = sys.version_info[:2] | |
11 | ||
12 | # Use hypothesis if not on python 2.6. | |
13 | if major_minor != (2, 6): | |
14 | use_hypothesis = True | |
15 | from hypothesis import assume, given, example | |
16 | from hypothesis.specifiers import ( | |
17 | integers_in_range, | |
18 | integers_from, | |
19 | sampled_from, | |
20 | ) | |
21 | # Otherwise mock these imports, because hypothesis | |
22 | # is incompatible with python 2.6. | |
23 | else: | |
24 | example = integers_in_range = integers_from = \ | |
25 | sampled_from = assume = given = compat.mock.MagicMock() | |
26 | use_hypothesis = False |
9 | 9 | import locale |
10 | 10 | |
11 | 11 | # Local imports |
12 | from natsort.locale_help import use_pyicu | |
13 | from natsort.compat.py23 import py23_str | |
12 | from natsort.compat.py23 import py23_str, py23_unichr, py23_range | |
14 | 13 | |
15 | 14 | |
16 | 15 | def load_locale(x): |
17 | 16 | """ Convenience to load a locale, trying ISO8859-1 first.""" |
18 | 17 | try: |
19 | 18 | locale.setlocale(locale.LC_ALL, str('{0}.ISO8859-1'.format(x))) |
20 | except: | |
19 | except locale.Error: | |
21 | 20 | locale.setlocale(locale.LC_ALL, str('{0}.UTF-8'.format(x))) |
22 | 21 | |
23 | 22 | # Check if de_DE is installed. |
27 | 26 | except locale.Error: |
28 | 27 | has_locale_de_DE = False |
29 | 28 | |
30 | # Make a function that will return the appropriate | |
31 | # strxfrm for the current locale. | |
32 | if use_pyicu: | |
33 | from natsort.locale_help import get_pyicu_transform | |
34 | from locale import getlocale | |
35 | ||
36 | def get_strxfrm(): | |
37 | return get_pyicu_transform(getlocale()) | |
38 | else: | |
39 | from natsort.locale_help import strxfrm | |
40 | ||
41 | def get_strxfrm(): | |
42 | return strxfrm | |
43 | ||
44 | 29 | # Depending on the python version, use lower or casefold |
45 | 30 | # to make a string lowercase. |
46 | 31 | try: |
47 | 32 | low = py23_str.casefold |
48 | 33 | except AttributeError: |
49 | 34 | low = py23_str.lower |
35 | ||
36 | # There are some unicode values that are known failures on BSD systems | |
37 | # that has nothing to do with natsort (a ValueError is raised by strxfrm). | |
38 | # Let's filter them out. | |
39 | try: | |
40 | bad_uni_chars = set(py23_unichr(x) for x in py23_range(0X10fefd, | |
41 | 0X10ffff+1)) | |
42 | except ValueError: | |
43 | # Narrow unicode build... no worries. | |
44 | bad_uni_chars = set() |
4 | 4 | """ |
5 | 5 | from __future__ import print_function |
6 | 6 | import cProfile |
7 | import random | |
8 | 7 | import sys |
9 | 8 | |
10 | 9 | sys.path.insert(0, '.') |
11 | from natsort import natsorted, index_natsorted | |
10 | from natsort import natsort_keygen, ns | |
12 | 11 | from natsort.compat.py23 import py23_range |
12 | import locale | |
13 | locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') | |
14 | ||
15 | # Samples to parse | |
16 | number = 14695498 | |
17 | int_string = '43493' | |
18 | float_string = '-434.93e7' | |
19 | plain_string = 'hello world' | |
20 | fancy_string = '7abba9342fdab' | |
21 | a_path = '/p/Folder (1)/file (1).tar.gz' | |
22 | some_bytes = b'these are bytes' | |
23 | a_list = ['hello', 'goodbye', '74'] | |
24 | ||
25 | basic_key = natsort_keygen() | |
26 | real_key = natsort_keygen(alg=ns.REAL) | |
27 | path_key = natsort_keygen(alg=ns.PATH) | |
28 | locale_key = natsort_keygen(alg=ns.LOCALE) | |
13 | 29 | |
14 | 30 | |
15 | # Sample lists to sort | |
16 | nums = random.sample(py23_range(10000), 1000) | |
17 | nstr = list(map(str, random.sample(py23_range(10000), 1000))) | |
18 | astr = ['a'+x+'num' for x in map(str, random.sample(py23_range(10000), 1000))] | |
19 | tstr = [['a'+x, 'a-'+x] | |
20 | for x in map(str, random.sample(py23_range(10000), 1000))] | |
21 | cstr = ['a'+x+'-'+x for x in map(str, random.sample(py23_range(10000), 1000))] | |
31 | def prof_time_to_generate(): | |
32 | print('*** Generate Plain Key ***') | |
33 | for _ in py23_range(100000): | |
34 | natsort_keygen() | |
35 | cProfile.run('prof_time_to_generate()', sort='time') | |
22 | 36 | |
23 | 37 | |
24 | def prof_nums(a): | |
25 | print('*** Basic Call, Numbers ***') | |
26 | for _ in py23_range(1000): | |
27 | natsorted(a) | |
28 | cProfile.run('prof_nums(nums)', sort='time') | |
29 | ||
30 | ||
31 | def prof_num_str(a): | |
32 | print('*** Basic Call, Numbers as Strings ***') | |
33 | for _ in py23_range(1000): | |
34 | natsorted(a) | |
35 | cProfile.run('prof_num_str(nstr)', sort='time') | |
36 | ||
37 | ||
38 | def prof_str(a): | |
39 | print('*** Basic Call, Strings ***') | |
40 | for _ in py23_range(1000): | |
41 | natsorted(a) | |
42 | cProfile.run('prof_str(astr)', sort='time') | |
43 | ||
44 | ||
45 | def prof_str_index(a): | |
46 | print('*** Basic Index Call ***') | |
47 | for _ in py23_range(1000): | |
48 | index_natsorted(a) | |
49 | cProfile.run('prof_str_index(astr)', sort='time') | |
50 | ||
51 | ||
52 | def prof_nested(a): | |
53 | print('*** Basic Call, Nested Strings ***') | |
54 | for _ in py23_range(1000): | |
55 | natsorted(a) | |
56 | cProfile.run('prof_nested(tstr)', sort='time') | |
57 | ||
58 | ||
59 | def prof_str_noexp(a): | |
60 | print('*** No-Exp Call ***') | |
61 | for _ in py23_range(1000): | |
62 | natsorted(a, exp=False) | |
63 | cProfile.run('prof_str_noexp(astr)', sort='time') | |
64 | ||
65 | ||
66 | def prof_str_unsigned(a): | |
67 | print('*** Unsigned Call ***') | |
68 | for _ in py23_range(1000): | |
69 | natsorted(a, signed=False) | |
70 | cProfile.run('prof_str_unsigned(astr)', sort='time') | |
71 | ||
72 | ||
73 | def prof_str_unsigned_noexp(a): | |
74 | print('*** Unsigned No-Exp Call ***') | |
75 | for _ in py23_range(1000): | |
76 | natsorted(a, signed=False, exp=False) | |
77 | cProfile.run('prof_str_unsigned_noexp(astr)', sort='time') | |
78 | ||
79 | ||
80 | def prof_str_asint(a): | |
81 | print('*** Int Call ***') | |
82 | for _ in py23_range(1000): | |
83 | natsorted(a, number_type=int) | |
84 | cProfile.run('prof_str_asint(astr)', sort='time') | |
85 | ||
86 | ||
87 | def prof_str_asint_unsigned(a): | |
88 | print('*** Unsigned Int (Versions) Call ***') | |
89 | for _ in py23_range(1000): | |
90 | natsorted(a, number_type=int, signed=False) | |
91 | cProfile.run('prof_str_asint_unsigned(astr)', sort='time') | |
92 | ||
93 | ||
94 | def prof_str_key(a): | |
95 | print('*** Basic Call With Key ***') | |
96 | for _ in py23_range(1000): | |
97 | natsorted(a, key=lambda x: x.upper()) | |
98 | cProfile.run('prof_str_key(astr)', sort='time') | |
99 | ||
100 | ||
101 | def prof_str_index_key(a): | |
102 | print('*** Basic Index Call With Key ***') | |
103 | for _ in py23_range(1000): | |
104 | index_natsorted(a, key=lambda x: x.upper()) | |
105 | cProfile.run('prof_str_index_key(astr)', sort='time') | |
106 | ||
107 | ||
108 | def prof_str_unorderable(a): | |
109 | print('*** Basic Index Call, "Unorderable" ***') | |
110 | for _ in py23_range(1000): | |
111 | natsorted(a) | |
112 | cProfile.run('prof_str_unorderable(cstr)', sort='time') | |
38 | def prof_parsing(a, msg, key=basic_key): | |
39 | print(msg) | |
40 | for _ in py23_range(100000): | |
41 | key(a) | |
42 | cProfile.run('prof_parsing(int_string, "*** Basic Call, Int as String ***")', sort='time') | |
43 | cProfile.run('prof_parsing(float_string, "*** Basic Call, Float as String ***")', sort='time') | |
44 | cProfile.run('prof_parsing(float_string, "*** Real Call ***", real_key)', sort='time') | |
45 | cProfile.run('prof_parsing(number, "*** Basic Call, Number ***")', sort='time') | |
46 | cProfile.run('prof_parsing(fancy_string, "*** Basic Call, Mixed String ***")', sort='time') | |
47 | cProfile.run('prof_parsing(some_bytes, "*** Basic Call, Byte String ***")', sort='time') | |
48 | cProfile.run('prof_parsing(a_path, "*** Path Call ***", path_key)', sort='time') | |
49 | cProfile.run('prof_parsing(a_list, "*** Basic Call, Recursive ***")', sort='time') | |
50 | cProfile.run('prof_parsing("434,930,000 dollars", "*** Locale Call ***", locale_key)', sort='time') |
2 | 2 | from __future__ import unicode_literals |
3 | 3 | |
4 | 4 | import unicodedata |
5 | from natsort.compat.py23 import PY_VERSION | |
5 | import collections | |
6 | import itertools | |
7 | import functools | |
8 | from natsort.unicode_numbers import decimals | |
9 | from natsort.compat.py23 import PY_VERSION, py23_zip | |
6 | 10 | |
7 | 11 | if PY_VERSION >= 3.0: |
8 | 12 | long = int |
9 | 13 | |
10 | ||
11 | def int_splitter(x, signed, safe, sep): | |
14 | triple_none = None, None, None | |
15 | _sentinel = object() | |
16 | SplitElement = collections.namedtuple('SplitElement', | |
17 | ['isnum', 'val', 'isuni']) | |
18 | ||
19 | ||
20 | def int_splitter(iterable, signed, sep): | |
12 | 21 | """Alternate (slow) method to split a string into numbers.""" |
13 | if not x: | |
14 | return [] | |
15 | all_digits = set('0123456789') | |
16 | full_list, strings, nums = [], [], [] | |
17 | input_len = len(x) | |
18 | for i, char in enumerate(x): | |
19 | # If this character is a sign and the next is a number, | |
20 | # start a new number. | |
21 | if (i+1 < input_len and signed and | |
22 | (char in '-+') and (x[i+1] in all_digits)): | |
23 | # Reset any current string or number. | |
24 | if strings: | |
25 | full_list.append(''.join(strings)) | |
26 | if nums: | |
27 | full_list.append(int(''.join(nums))) | |
28 | strings = [] | |
29 | nums = [char] | |
30 | # If this is a number, add to the number list. | |
31 | elif char in all_digits: | |
32 | nums.append(char) | |
33 | # Reset any string. | |
34 | if strings: | |
35 | full_list.append(''.join(strings)) | |
36 | strings = [] | |
37 | # If this is a unicode digit, append directly to the full list. | |
38 | elif char.isdigit(): | |
39 | # Reset any string or number. | |
40 | if strings: | |
41 | full_list.append(''.join(strings)) | |
42 | if nums: | |
43 | full_list.append(int(''.join(nums))) | |
44 | strings = [] | |
45 | nums = [] | |
46 | full_list.append(unicodedata.digit(char)) | |
47 | # Otherwise add to the string. | |
48 | else: | |
49 | strings.append(char) | |
50 | # Reset any number. | |
51 | if nums: | |
52 | full_list.append(int(''.join(nums))) | |
53 | nums = [] | |
54 | if nums: | |
55 | full_list.append(int(''.join(nums))) | |
56 | elif strings: | |
57 | full_list.append(''.join(strings)) | |
58 | if safe: | |
59 | full_list = sep_inserter(full_list, (int, long), sep) | |
60 | if type(full_list[0]) in (int, long): | |
61 | return [sep] + full_list | |
62 | else: | |
63 | return full_list | |
64 | ||
65 | ||
66 | def float_splitter(x, signed, exp, safe, sep): | |
22 | iterable = unicodedata.normalize('NFD', iterable) | |
23 | split_by_decimal = itertools.groupby(iterable, lambda a: a.isdigit()) | |
24 | split_by_decimal = refine_split_grouping(split_by_decimal) | |
25 | split = int_splitter_iter(split_by_decimal, signed) | |
26 | split = sep_inserter(split, sep) | |
27 | return tuple(add_leading_space_if_first_is_num(split, sep)) | |
28 | ||
29 | ||
30 | def float_splitter(iterable, signed, exp, sep): | |
67 | 31 | """Alternate (slow) method to split a string into numbers.""" |
68 | if not x: | |
69 | return [] | |
70 | all_digits = set('0123456789') | |
71 | full_list, strings, nums = [], [], [] | |
72 | input_len = len(x) | |
73 | for i, char in enumerate(x): | |
74 | # If this character is a sign and the next is a number, | |
75 | # start a new number. | |
76 | if (i+1 < input_len and | |
77 | (signed or (i > 1 and exp and x[i-1] in 'eE' and | |
78 | x[i-2] in all_digits)) and | |
79 | (char in '-+') and (x[i+1] in all_digits)): | |
80 | # Reset any current string or number. | |
81 | if strings: | |
82 | full_list.append(''.join(strings)) | |
83 | if nums and i > 0 and x[i-1] not in 'eE': | |
84 | full_list.append(float(''.join(nums))) | |
85 | nums = [char] | |
32 | ||
33 | def number_tester(x): | |
34 | return x.isdecimal() or unicodedata.numeric(x, None) is not None | |
35 | ||
36 | iterable = unicodedata.normalize('NFD', iterable) | |
37 | split_by_decimal = itertools.groupby(iterable, number_tester) | |
38 | split_by_decimal = peekable(refine_split_grouping(split_by_decimal)) | |
39 | split = float_splitter_iter(split_by_decimal, signed, exp) | |
40 | split = sep_inserter(split, sep) | |
41 | return tuple(add_leading_space_if_first_is_num(split, sep)) | |
42 | ||
43 | ||
44 | def refine_split_grouping(iterable): | |
45 | """Combines lists into strings, and separates unicode numbers from ASCII""" | |
46 | for isnum, values in iterable: | |
47 | values = list(values) | |
48 | # Further refine numbers into unicode and ASCII numeric characters. | |
49 | if isnum: | |
50 | num_grouped = group_unicode_and_ascii_numbers(values) | |
51 | for isuni, num_values in num_grouped: | |
52 | # If unicode, return one character at a time. | |
53 | if isuni: | |
54 | for u in num_values: | |
55 | yield SplitElement(True, u, True) | |
56 | # If ASCII, combine into a single multicharacter number. | |
57 | else: | |
58 | val = ''.join(num_values) | |
59 | yield SplitElement(True, val, False) | |
60 | ||
61 | else: | |
62 | # If non-numeric, combine into a single string. | |
63 | val = ''.join(values) | |
64 | yield SplitElement(False, val, False) | |
65 | ||
66 | ||
67 | def group_unicode_and_ascii_numbers( | |
68 | iterable, ascii_digits=frozenset(decimals + '0123456789') | |
69 | ): | |
70 | """ | |
71 | Use groupby to group ASCII and unicode numeric characters. | |
72 | Assumes all input is already all numeric characters. | |
73 | """ | |
74 | return itertools.groupby(iterable, lambda a: a not in ascii_digits) | |
75 | ||
76 | ||
77 | def int_splitter_iter(iterable, signed): | |
78 | """Split the input into integers and strings.""" | |
79 | for isnum, val, isuni in iterable: | |
80 | if isuni: | |
81 | yield unicodedata.digit(val) | |
82 | elif isnum: | |
83 | yield int(val) | |
84 | elif signed: | |
85 | for x in try_to_read_signed_integer(iterable, val): | |
86 | yield int(''.join(x)) if isinstance(x, list) else x | |
87 | else: | |
88 | yield val | |
89 | ||
90 | ||
91 | def float_splitter_iter(iterable, signed, exp): | |
92 | """Split the input into integers and other.""" | |
93 | weird_check = ('-inf', '-infinity', '+inf', '+infinity', | |
94 | 'inf', 'infinity', 'nan', '-nan', '+nan') | |
95 | try_to_read_float_correctly = [ | |
96 | try_to_read_float, | |
97 | try_to_read_float_with_exp, | |
98 | functools.partial(try_to_read_signed_float_template, | |
99 | key=try_to_read_float), | |
100 | functools.partial(try_to_read_signed_float_template, | |
101 | key=try_to_read_float_with_exp), | |
102 | ][signed * 2 + exp * 1] # Choose the appropriate converter function. | |
103 | for isnum, val, isuni in iterable: | |
104 | if isuni: | |
105 | yield unicodedata.numeric(val) | |
106 | else: | |
107 | for x in try_to_read_float_correctly(iterable, isnum, val): | |
108 | if isinstance(x, list): | |
109 | yield float(''.join(x)) | |
110 | elif x.lower().strip(' \t\n\r\f\v') in weird_check: | |
111 | yield float(x) | |
112 | else: | |
113 | yield x | |
114 | ||
115 | ||
116 | def try_to_read_signed_integer(iterable, val): | |
117 | """ | |
118 | If the given string ends with +/-, attempt to return a signed int. | |
119 | Otherwise, return the string as-is. | |
120 | """ | |
121 | if val.endswith(('+', '-')): | |
122 | next_element = next(iterable, None) | |
123 | ||
124 | # Last element, return as-is. | |
125 | if next_element is None: | |
126 | yield val | |
127 | return | |
128 | ||
129 | # We know the next value in the sequence must be "isnum == True". | |
130 | # We just need to handle unicode or not. | |
131 | _, next_val, next_isuni = next_element | |
132 | ||
133 | # If unicode, don't apply sign and just return the val as-is | |
134 | # and convert the unicode character. | |
135 | if next_isuni: | |
136 | yield val | |
137 | yield unicodedata.digit(next_val) | |
138 | ||
139 | # If the val is *only* the sign, return only the number. | |
140 | elif val in ('-', '+'): | |
141 | yield [val, next_val] | |
142 | ||
143 | # Otherwise, remove the sign from the val and apply it to the number, | |
144 | # returning both. | |
145 | else: | |
146 | yield val[:-1] | |
147 | yield [val[-1], next_val] | |
148 | ||
149 | else: | |
150 | yield val | |
151 | ||
152 | ||
153 | def try_to_read_float(iterable, isnum, val): | |
154 | """ | |
155 | Try to read a string that matches num.num and return as a float. | |
156 | Otherwise return the input as found. | |
157 | """ | |
158 | # Extract what is coming next. | |
159 | next_isnum, next_val, next_isuni = iterable.peek(triple_none) | |
160 | ||
161 | # If a non-number was given, we can only accept a decimal point. | |
162 | if not isnum: | |
163 | ||
164 | # If the next value is None or not a non-uni number, return as-is. | |
165 | if next_val is None or not next_isnum or next_isuni: | |
166 | yield val | |
167 | ||
168 | # If this the decimal point, add it to the number and return. | |
169 | elif val == '.': | |
170 | next(iterable) # To progress the iterator. | |
171 | yield [val, next_val] | |
172 | ||
173 | # If the val ends with the decimal point, split the decimal point | |
174 | # off the end of the string then place it to the front of the | |
175 | # iterable so that we can use it later. | |
176 | elif val.endswith('.'): | |
177 | iterable.push(SplitElement(False, val[-1], False)) | |
178 | yield val[:-1] | |
179 | ||
180 | # Otherwise, just return the val and move on. | |
181 | else: | |
182 | yield val | |
183 | ||
184 | # If a number, read the number then try to get the post-decimal part. | |
185 | else: | |
186 | ||
187 | # If the next element is not '.', return now. | |
188 | if next_val != '.': | |
189 | # If the next val starts with a '.', let's add that. | |
190 | if next_val is not None and next_val.startswith('.'): | |
191 | next(iterable) # To progress the iterator. | |
192 | iterable.push(SplitElement(False, next_val[1:], False)) | |
193 | yield [val, next_val[0]] | |
86 | 194 | else: |
87 | nums.append(char) | |
88 | strings = [] | |
89 | # If this is a number, add to the number list. | |
90 | elif char in all_digits: | |
91 | nums.append(char) | |
92 | # Reset any string. | |
93 | if strings: | |
94 | full_list.append(''.join(strings)) | |
95 | strings = [] | |
96 | # If this is a decimal, add to the number list. | |
97 | elif (i + 1 < input_len and char == '.' and x[i+1] in all_digits): | |
98 | if nums and '.' in nums: | |
99 | full_list.append(float(''.join(nums))) | |
100 | nums = [] | |
101 | nums.append(char) | |
102 | if strings: | |
103 | full_list.append(''.join(strings)) | |
104 | strings = [] | |
105 | # If this is an exponent, add to the number list. | |
106 | elif (i > 0 and i + 1 < input_len and exp and char in 'eE' and | |
107 | x[i-1] in all_digits and x[i+1] in all_digits | set('+-')): | |
108 | if 'e' in nums or 'E' in nums: | |
109 | strings = [char] | |
110 | full_list.append(float(''.join(nums))) | |
111 | nums = [] | |
195 | yield [val] | |
196 | ||
197 | # Recursively parse the decimal and after. If the returned | |
198 | # value is a list, add the list to the current number. | |
199 | # If not, just return the number with the decimal. | |
200 | else: | |
201 | # If the first value returned from the try_to_read_float | |
202 | # is a list, add it to the float component list. | |
203 | next(iterable) # To progress the iterator. | |
204 | ret = next(try_to_read_float(iterable, next_isnum, next_val)) | |
205 | if isinstance(ret, list): | |
206 | yield [val] + ret | |
112 | 207 | else: |
113 | nums.append(char) | |
114 | # If this is a unicode digit, append directly to the full list. | |
115 | elif unicodedata.numeric(char, None) is not None: | |
116 | # Reset any string or number. | |
117 | if strings: | |
118 | full_list.append(''.join(strings)) | |
119 | if nums: | |
120 | full_list.append(float(''.join(nums))) | |
121 | strings = [] | |
122 | nums = [] | |
123 | full_list.append(unicodedata.numeric(char)) | |
124 | # Otherwise add to the string. | |
125 | else: | |
126 | strings.append(char) | |
127 | # Reset any number. | |
128 | if nums: | |
129 | full_list.append(float(''.join(nums))) | |
130 | nums = [] | |
131 | if nums: | |
132 | full_list.append(float(''.join(nums))) | |
133 | elif strings: | |
134 | full_list.append(''.join(strings)) | |
135 | # Fix a float that looks like a string. | |
136 | fstrings = ('inf', 'infinity', '-inf', '-infinity', | |
137 | '+inf', '+infinity', 'nan') | |
138 | full_list = [float(y) if type(y) != float and y.lower() in fstrings else y | |
139 | for y in full_list] | |
140 | if safe: | |
141 | full_list = sep_inserter(full_list, (float,), sep) | |
142 | if type(full_list[0]) == float: | |
143 | return [sep] + full_list | |
144 | else: | |
145 | return full_list | |
146 | ||
147 | ||
148 | def sep_inserter(x, t, sep): | |
149 | # Simulates the py3_safe function. | |
150 | ret = [x[0]] | |
151 | for i, y in enumerate(x[1:]): | |
152 | if type(y) in t and type(x[i]) in t: | |
153 | ret.append(sep) | |
154 | ret.append(y) | |
208 | yield [val, next_val] | |
209 | ||
210 | ||
211 | def try_to_read_float_with_exp(iterable, isnum, val): | |
212 | """ | |
213 | Try to read a string that matches num.numE[+-]num and return as a float. | |
214 | Otherwise return the input as found. | |
215 | """ | |
216 | exp_ident = ('e', 'E', 'e-', 'E-', 'e+', 'E+') | |
217 | ||
218 | # Start by reading the floating point part. | |
219 | float_ret = next(try_to_read_float(iterable, isnum, val)) | |
220 | ||
221 | # Extract what is coming next. | |
222 | next_isnum, next_val, next_isuni = iterable.peek(triple_none) | |
223 | ||
224 | # If the float part is not a list, or the next value | |
225 | # is not in the exponential identifier list, return it as-is. | |
226 | if not isinstance(float_ret, list) or next_val not in exp_ident: | |
227 | yield float_ret | |
228 | ||
229 | # We know the next_val is an exponential identifier. See if the value | |
230 | # after that is a non-unicode number. If so, return all as a float. | |
231 | # If not, put the exponential identifier back on the front of the | |
232 | # list and return the float_ret as-is. | |
233 | else: | |
234 | exp = SplitElement(next_isnum, next_val, next_isuni) | |
235 | next(iterable) # To progress the iterator. | |
236 | next_isnum, next_val, next_isuni = iterable.peek(triple_none) | |
237 | if next_isnum and not next_isuni: | |
238 | next(iterable) # To progress the iterator. | |
239 | yield float_ret + [exp.val, next_val] | |
240 | else: | |
241 | iterable.push(exp) | |
242 | yield float_ret | |
243 | ||
244 | ||
245 | def try_to_read_signed_float_template(iterable, isnum, val, key): | |
246 | """ | |
247 | Try to read a string that matches [+-]num.numE[+-]num and return as a | |
248 | float. Otherwise return the input as found. | |
249 | """ | |
250 | # Extract what is coming next. | |
251 | next_isnum, next_val, next_isuni = iterable.peek(triple_none) | |
252 | ||
253 | # If it looks like there is a sign here and the next value is a | |
254 | # non-unicode number, try to parse that with the sign. | |
255 | if val.endswith(('+', '-')) and next_isnum and not next_isuni: | |
256 | ||
257 | # If this value is a sign, return the combo. | |
258 | if val in ('+', '-'): | |
259 | next(iterable) # To progress the iterator. | |
260 | yield [val] + next(key(iterable, next_isnum, next_val)) | |
261 | ||
262 | # If the val ends with the sign split the sign off the end of | |
263 | # the string then place it to the front of the iterable so that | |
264 | # we can use it later. | |
265 | else: | |
266 | iterable.push(SplitElement(False, val[-1], False)) | |
267 | yield val[:-1] | |
268 | ||
269 | # If it looks like there is a sign here and the next value is a | |
270 | # decimal, try to parse as a decimal. | |
271 | elif val.endswith(('+.', '-.')) and next_isnum and not next_isuni: | |
272 | ||
273 | # Push back a zero before the decimal then parse. | |
274 | print(val, iterable.peek()) | |
275 | ||
276 | # If this value is a sign, return the combo | |
277 | if val[:-1] in ('+', '-'): | |
278 | yield [val[:-1]] + next(key(iterable, False, val[-1])) | |
279 | ||
280 | # If the val ends with the sign split the decimal the end of | |
281 | # the string then place it to the front of the iterable so that | |
282 | # we can use it later. | |
283 | else: | |
284 | iterable.push(SplitElement(False, val[-2:], False)) | |
285 | yield val[:-2] | |
286 | ||
287 | # If no sign, pass directly to the key function. | |
288 | else: | |
289 | yield next(key(iterable, isnum, val)) | |
290 | ||
291 | ||
292 | def add_leading_space_if_first_is_num(iterable, sep): | |
293 | """Check if the first element is a number, and prepend with space if so.""" | |
294 | z, peek = itertools.tee(iterable) | |
295 | if type(next(peek, None)) in (int, long, float): | |
296 | z = itertools.chain([sep], z) | |
297 | del peek | |
298 | return z | |
299 | ||
300 | ||
301 | def sep_inserter(iterable, sep, types=frozenset((int, long, float))): | |
302 | """Simulates the py3_safe function.""" | |
303 | pairs = pairwise(iterable) | |
304 | ||
305 | # Prime loop by handling first pair specially. | |
306 | try: | |
307 | first, second = next(pairs) | |
308 | except StopIteration: | |
309 | return | |
310 | if second is None: # Only one element | |
311 | yield first | |
312 | elif type(first) in types and type(second) in types: | |
313 | yield first | |
314 | yield sep | |
315 | yield second | |
316 | else: | |
317 | yield first | |
318 | yield second | |
319 | ||
320 | # Handle all remaining pairs in loop. | |
321 | for first, second in pairs: | |
322 | if type(first) in types and type(second) in types: | |
323 | yield sep | |
324 | yield second | |
325 | ||
326 | ||
327 | def pairwise(iterable): | |
328 | "s -> (s0,s1), (s1,s2), (s2,s3), ..." | |
329 | split1, split2 = itertools.tee(iterable) | |
330 | a, b = itertools.tee(split1) | |
331 | test1, test2 = itertools.tee(split2) | |
332 | next(b, None) | |
333 | if next(test1, None) is None: | |
334 | ret = py23_zip(a, b) # Returns empty list | |
335 | elif next(test2, None) is not None and next(test2, None) is None: | |
336 | ret = py23_zip(a, [None]) # Return at least one value | |
337 | else: | |
338 | ret = py23_zip(a, b) | |
339 | del test1, test2, split2 | |
155 | 340 | return ret |
341 | ||
342 | ||
343 | class peekable(object): | |
344 | """Wrapper for an iterator to allow 1-item lookahead | |
345 | Call ``peek()`` on the result to get the value that will next pop out of | |
346 | ``next()``, without advancing the iterator: | |
347 | >>> p = peekable(xrange(2)) | |
348 | >>> p.peek() | |
349 | 0 | |
350 | >>> p.next() | |
351 | 0 | |
352 | >>> p.peek() | |
353 | 1 | |
354 | >>> p.next() | |
355 | 1 | |
356 | Pass ``peek()`` a default value, and it will be returned in the case where | |
357 | the iterator is exhausted: | |
358 | >>> p = peekable([]) | |
359 | >>> p.peek('hi') | |
360 | 'hi' | |
361 | If no default is provided, ``peek()`` raises ``StopIteration`` when there | |
362 | are no items left. | |
363 | To test whether there are more items in the iterator, examine the | |
364 | peekable's truth value. If it is truthy, there are more items. | |
365 | >>> assert peekable(xrange(1)) | |
366 | >>> assert not peekable([]) | |
367 | """ | |
368 | # Lowercase to blend in with itertools. The fact that it's a class is an | |
369 | # implementation detail. | |
370 | ||
371 | def __init__(self, iterable): | |
372 | self._it = iter(iterable) | |
373 | ||
374 | def __iter__(self): | |
375 | return self | |
376 | ||
377 | def __nonzero__(self): | |
378 | try: | |
379 | self.peek() | |
380 | except StopIteration: | |
381 | return False | |
382 | return True | |
383 | ||
384 | __bool__ = __nonzero__ | |
385 | ||
386 | def peek(self, default=_sentinel): | |
387 | """Return the item that will be next returned from ``next()``. | |
388 | Return ``default`` if there are no items left. If ``default`` is not | |
389 | provided, raise ``StopIteration``. | |
390 | """ | |
391 | if not hasattr(self, '_peek'): | |
392 | try: | |
393 | self._peek = next(self._it) | |
394 | except StopIteration: | |
395 | if default is _sentinel: | |
396 | raise | |
397 | return default | |
398 | return self._peek | |
399 | ||
400 | def next(self): | |
401 | ret = self.peek() | |
402 | try: | |
403 | del self._peek | |
404 | except AttributeError: | |
405 | pass | |
406 | return ret | |
407 | ||
408 | __next__ = next | |
409 | ||
410 | def push(self, value): | |
411 | """Put an element at the front of the iterable.""" | |
412 | if hasattr(self, '_peek'): | |
413 | self._it = itertools.chain([value, self._peek], self._it) | |
414 | del self._peek | |
415 | else: | |
416 | self._it = itertools.chain([value], self._it) |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | This file contains functions to stress-test natsort, looking | |
3 | for cases that raise an unknown exception. | |
4 | """ | |
5 | from random import randint, sample, choice | |
6 | from string import printable | |
7 | from copy import copy | |
8 | from pytest import fail | |
9 | from natsort import natsorted | |
10 | from natsort.compat.py23 import py23_range | |
11 | ||
12 | ||
13 | def test_random(): | |
14 | """Try to sort 100,000 randomly generated strings without exception.""" | |
15 | ||
16 | # Repeat test 100,000 times | |
17 | for _ in py23_range(100000): | |
18 | # Made a list of five randomly generated strings | |
19 | lst = [''.join(sample(printable, randint(7, 30))) | |
20 | for __ in py23_range(5)] | |
21 | # Try to sort. If there is an exception, give some detailed info. | |
22 | try: | |
23 | natsorted(lst) | |
24 | except Exception as e: | |
25 | msg = "Ended with exception type '{exc}: {msg}'.\n" | |
26 | msg += "Failed on the input {lst}." | |
27 | fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst))) | |
28 | ||
29 | ||
30 | def test_similar(): | |
31 | """Try to sort 100,000 randomly generated | |
32 | similar strings without exception. | |
33 | """ | |
34 | ||
35 | # Repeat test 100,000 times | |
36 | for _ in py23_range(100000): | |
37 | # Create a randomly generated string | |
38 | base = sample(printable, randint(7, 30)) | |
39 | # Make a list of strings based on this string, | |
40 | # with some randomly generated modifications | |
41 | lst = [] | |
42 | for __ in py23_range(5): | |
43 | new_str = copy(base) | |
44 | for ___ in py23_range(randint(1, 5)): | |
45 | new_str[randint(0, len(base)-1)] = choice(printable) | |
46 | lst.append(''.join(new_str)) | |
47 | # Try to sort. If there is an exception, give some detailed info. | |
48 | try: | |
49 | natsorted(lst) | |
50 | except Exception as e: | |
51 | msg = "Ended with exception type '{exc}: {msg}'.\n" | |
52 | msg += "Failed on the input {lst}." | |
53 | fail(msg.format(exc=type(e).__name__, msg=str(e), lst=str(lst))) |
3 | 3 | """ |
4 | 4 | from __future__ import unicode_literals |
5 | 5 | |
6 | import pytest | |
7 | 6 | import unicodedata |
8 | 7 | from math import isnan |
9 | from natsort.compat.py23 import py23_str | |
8 | from natsort.compat.py23 import PY_VERSION | |
10 | 9 | from natsort.compat.fake_fastnumbers import ( |
11 | 10 | fast_float, |
12 | 11 | fast_int, |
13 | isfloat, | |
14 | isint, | |
15 | 12 | ) |
16 | from compat.hypothesis import ( | |
17 | assume, | |
13 | from hypothesis import ( | |
18 | 14 | given, |
19 | use_hypothesis, | |
20 | 15 | ) |
16 | from hypothesis.strategies import ( | |
17 | floats, | |
18 | integers, | |
19 | text, | |
20 | ) | |
21 | ||
22 | if PY_VERSION >= 3: | |
23 | long = int | |
21 | 24 | |
22 | 25 | |
23 | 26 | def is_float(x): |
34 | 37 | return True |
35 | 38 | |
36 | 39 | |
40 | def not_a_float(x): | |
41 | return not is_float(x) | |
42 | ||
43 | ||
37 | 44 | def is_int(x): |
38 | 45 | try: |
39 | int(x) | |
40 | except ValueError: | |
46 | return x.is_integer() | |
47 | except AttributeError: | |
41 | 48 | try: |
42 | unicodedata.digit(x) | |
43 | except (ValueError, TypeError): | |
44 | return False | |
49 | long(x) | |
50 | except ValueError: | |
51 | try: | |
52 | unicodedata.digit(x) | |
53 | except (ValueError, TypeError): | |
54 | return False | |
55 | else: | |
56 | return True | |
45 | 57 | else: |
46 | 58 | return True |
47 | else: | |
48 | return True | |
59 | ||
60 | ||
61 | def not_an_int(x): | |
62 | return not is_int(x) | |
63 | ||
49 | 64 | |
50 | 65 | # Each test has an "example" version for demonstrative purposes, |
51 | 66 | # and a test that uses the hypothesis module. |
52 | 67 | |
53 | 68 | |
69 | def test_fast_float_returns_nan_alternate_if_nan_option_is_given(): | |
70 | assert fast_float('nan', nan=7) == 7 | |
71 | ||
72 | ||
54 | 73 | def test_fast_float_converts_float_string_to_float_example(): |
55 | 74 | assert fast_float('45.8') == 45.8 |
56 | 75 | assert fast_float('-45') == -45.0 |
57 | assert fast_float('45.8e-2') == 45.8e-2 | |
76 | assert fast_float('45.8e-2', key=len) == 45.8e-2 | |
58 | 77 | assert isnan(fast_float('nan')) |
78 | assert isnan(fast_float('+nan')) | |
79 | assert isnan(fast_float('-NaN')) | |
80 | assert fast_float('۱۲.۱۲') == 12.12 | |
81 | assert fast_float('-۱۲.۱۲') == -12.12 | |
59 | 82 | |
60 | 83 | |
61 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
62 | @given(float) | |
84 | @given(floats(allow_nan=False)) | |
63 | 85 | def test_fast_float_converts_float_string_to_float(x): |
64 | assume(not isnan(x)) # But inf is included | |
65 | 86 | assert fast_float(repr(x)) == x |
66 | 87 | |
67 | 88 | |
69 | 90 | assert fast_float('invalid') == 'invalid' |
70 | 91 | |
71 | 92 | |
72 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
73 | @given(py23_str) | |
93 | @given(text().filter(not_a_float).filter(bool)) | |
74 | 94 | def test_fast_float_leaves_string_as_is(x): |
75 | assume(not is_float(x)) | |
76 | 95 | assert fast_float(x) == x |
96 | ||
97 | ||
98 | def test_fast_float_with_key_applies_to_string_example(): | |
99 | assert fast_float('invalid', key=len) == len('invalid') | |
100 | ||
101 | ||
102 | @given(text().filter(not_a_float).filter(bool)) | |
103 | def test_fast_float_with_key_applies_to_string(x): | |
104 | assert fast_float(x, key=len) == len(x) | |
77 | 105 | |
78 | 106 | |
79 | 107 | def test_fast_int_leaves_float_string_as_is_example(): |
82 | 110 | assert fast_int('inf') == 'inf' |
83 | 111 | |
84 | 112 | |
85 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
86 | @given(float) | |
113 | @given(floats().filter(not_an_int)) | |
87 | 114 | def test_fast_int_leaves_float_string_as_is(x): |
88 | assume(not x.is_integer()) | |
89 | 115 | assert fast_int(repr(x)) == repr(x) |
90 | 116 | |
91 | 117 | |
92 | 118 | def test_fast_int_converts_int_string_to_int_example(): |
93 | 119 | assert fast_int('-45') == -45 |
94 | 120 | assert fast_int('+45') == 45 |
121 | assert fast_int('۱۲') == 12 | |
122 | assert fast_int('-۱۲') == -12 | |
95 | 123 | |
96 | 124 | |
97 | @given(int) | |
125 | @given(integers()) | |
98 | 126 | def test_fast_int_converts_int_string_to_int(x): |
99 | 127 | assert fast_int(repr(x)) == x |
100 | 128 | |
103 | 131 | assert fast_int('invalid') == 'invalid' |
104 | 132 | |
105 | 133 | |
106 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
107 | @given(py23_str) | |
134 | @given(text().filter(not_an_int).filter(bool)) | |
108 | 135 | def test_fast_int_leaves_string_as_is(x): |
109 | assume(not is_int(x)) | |
110 | 136 | assert fast_int(x) == x |
111 | 137 | |
112 | 138 | |
113 | def test_isfloat_returns_True_for_real_numbers_example(): | |
114 | assert isfloat(-45.0) | |
115 | assert isfloat(45.8e-2) | |
139 | def test_fast_int_with_key_applies_to_string_example(): | |
140 | assert fast_int('invalid', key=len) == len('invalid') | |
116 | 141 | |
117 | 142 | |
118 | @given(float) | |
119 | def test_isfloat_returns_True_for_real_numbers(x): | |
120 | assert isfloat(x) | |
121 | ||
122 | ||
123 | def test_isfloat_returns_False_for_strings_example(): | |
124 | assert not isfloat('45.8') | |
125 | assert not isfloat('invalid') | |
126 | ||
127 | ||
128 | @given(py23_str) | |
129 | def test_isfloat_returns_False_for_strings(x): | |
130 | assert not isfloat(x) | |
131 | ||
132 | ||
133 | def test_isint_returns_True_for_real_numbers_example(): | |
134 | assert isint(-45) | |
135 | assert isint(45) | |
136 | ||
137 | ||
138 | @given(int) | |
139 | def test_isint_returns_True_for_real_numbers(x): | |
140 | assert isint(x) | |
141 | ||
142 | ||
143 | def test_isint_returns_False_for_strings_example(): | |
144 | assert not isint('45') | |
145 | assert not isint('invalid') | |
146 | ||
147 | ||
148 | @given(py23_str) | |
149 | def test_isint_returns_False_for_strings(x): | |
150 | assert not isint(x) | |
143 | @given(text().filter(not_an_int).filter(bool)) | |
144 | def test_fast_int_with_key_applies_to_string(x): | |
145 | assert fast_int(x, key=len) == len(x) |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from natsort.ns_enum import ns | |
5 | from natsort.utils import _final_data_transform_factory | |
6 | from natsort.compat.py23 import py23_str | |
7 | from hypothesis import ( | |
8 | given, | |
9 | ) | |
10 | from hypothesis.strategies import ( | |
11 | text, | |
12 | floats, | |
13 | integers, | |
14 | ) | |
15 | ||
16 | ||
17 | # Each test has an "example" version for demonstrative purposes, | |
18 | # and a test that uses the hypothesis module. | |
19 | ||
20 | ||
21 | def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options_example(): | |
22 | assert _final_data_transform_factory(0, '', '')(iter([7]), '') == (7,) | |
23 | ||
24 | ||
25 | @given(text()) | |
26 | def test_final_data_transform_factory_with_iterable_returns_tuple_with_no_options(x): | |
27 | assert _final_data_transform_factory(0, '', '')(iter([x]), '') == (x,) | |
28 | # UNGROUPLETTERS without LOCALE does nothing, as does LOCALE without UNGROUPLETTERS | |
29 | assert _final_data_transform_factory(ns.UNGROUPLETTERS, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '') | |
30 | assert _final_data_transform_factory(ns.LOCALE, '', '')(iter([x]), '') == _final_data_transform_factory(0, '', '')(iter([x]), '') | |
31 | ||
32 | ||
33 | def test_final_data_transform_factory_with_empty_tuple_returns_double_empty_tuple(): | |
34 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((), '') == ((), ()) | |
35 | ||
36 | ||
37 | def test_final_data_transform_factory_with_null_string_first_element_adds_empty_string_on_first_tuple_element(): | |
38 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', 'xx')(('', 60), '') == (('xx',), ('', 60)) | |
39 | ||
40 | ||
41 | def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_example(): | |
42 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')(('this', 60), 'this60') == (('t',), ('this', 60)) | |
43 | ||
44 | ||
45 | @given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers()) | |
46 | def test_final_data_transform_factory_returns_first_element_in_first_tuple_element(x, y): | |
47 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0],), (x, y)) | |
48 | ||
49 | ||
50 | def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST_example(): | |
51 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')(('this', 60), 'this60') == (('T',), ('this', 60)) | |
52 | ||
53 | ||
54 | @given(x=text().filter(bool), y=floats(allow_nan=False, allow_infinity=False) | integers()) | |
55 | def test_final_data_transform_factory_returns_first_element_in_first_tuple_element_caseswapped_with_DUMB_and_LOWERCASEFIRST(x, y): | |
56 | assert _final_data_transform_factory(ns.LOCALE | ns.UNGROUPLETTERS | ns._DUMB | ns.LOWERCASEFIRST, '', '')((x, y), ''.join(map(py23_str, [x, y]))) == ((x[0].swapcase(),), (x, y)) |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import pytest | |
5 | import locale | |
6 | from operator import methodcaller | |
7 | from natsort.ns_enum import ns | |
8 | from natsort.utils import _input_string_transform_factory | |
9 | from natsort.compat.py23 import NEWPY | |
10 | from compat.locale import ( | |
11 | load_locale, | |
12 | has_locale_de_DE, | |
13 | ) | |
14 | from hypothesis import ( | |
15 | given, | |
16 | ) | |
17 | from hypothesis.strategies import ( | |
18 | text, | |
19 | integers, | |
20 | lists, | |
21 | ) | |
22 | ||
23 | ||
24 | # Each test has an "example" version for demonstrative purposes, | |
25 | # and a test that uses the hypothesis module. | |
26 | ||
27 | ||
28 | def test_input_string_transform_factory_is_no_op_for_no_alg_options_examples(): | |
29 | x = 'feijGGAd' | |
30 | assert _input_string_transform_factory(0)(x) is x | |
31 | ||
32 | ||
33 | @given(text()) | |
34 | def test_input_string_transform_factory_is_no_op_for_no_alg_options(x): | |
35 | assert _input_string_transform_factory(0)(x) is x | |
36 | ||
37 | ||
38 | def test_input_string_transform_factory_performs_casefold_with_IGNORECASE_examples(): | |
39 | x = 'feijGGAd' | |
40 | if NEWPY: | |
41 | assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.casefold() | |
42 | else: | |
43 | assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.lower() | |
44 | ||
45 | ||
46 | @given(text()) | |
47 | def test_input_string_transform_factory_performs_casefold_with_IGNORECASE(x): | |
48 | if NEWPY: | |
49 | assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.casefold() | |
50 | else: | |
51 | assert _input_string_transform_factory(ns.IGNORECASE)(x) == x.lower() | |
52 | ||
53 | ||
54 | def test_input_string_transform_factory_performs_swapcase_with_DUMB_examples(): | |
55 | x = 'feijGGAd' | |
56 | assert _input_string_transform_factory(ns._DUMB)(x) == x.swapcase() | |
57 | ||
58 | ||
59 | @given(text()) | |
60 | def test_input_string_transform_factory_performs_swapcase_with_DUMB(x): | |
61 | assert _input_string_transform_factory(ns._DUMB)(x) == x.swapcase() | |
62 | ||
63 | ||
64 | def test_input_string_transform_factory_performs_swapcase_with_LOWERCASEFIRST_example(): | |
65 | x = 'feijGGAd' | |
66 | assert _input_string_transform_factory(ns.LOWERCASEFIRST)(x) == x.swapcase() | |
67 | ||
68 | ||
69 | @given(text()) | |
70 | def test_input_string_transform_factory_performs_swapcase_with_LOWERCASEFIRST(x): | |
71 | x = 'feijGGAd' | |
72 | assert _input_string_transform_factory(ns.LOWERCASEFIRST)(x) == x.swapcase() | |
73 | ||
74 | ||
75 | def test_input_string_transform_factory_is_no_op_with_both_LOWERCASEFIRST_AND_DUMB_example(): | |
76 | x = 'feijGGAd' | |
77 | assert _input_string_transform_factory(ns._DUMB | ns.LOWERCASEFIRST)(x) is x | |
78 | ||
79 | ||
80 | @given(text()) | |
81 | def test_input_string_transform_factory_is_no_op_with_both_LOWERCASEFIRST_AND_DUMB(x): | |
82 | assert _input_string_transform_factory(ns._DUMB | ns.LOWERCASEFIRST)(x) is x | |
83 | ||
84 | ||
85 | def test_input_string_transform_factory_performs_swapcase_and_casefold_both_LOWERCASEFIRST_AND_IGNORECASE_example(): | |
86 | x = 'feijGGAd' | |
87 | if NEWPY: | |
88 | assert _input_string_transform_factory(ns.IGNORECASE | ns.LOWERCASEFIRST)(x) == x.swapcase().casefold() | |
89 | else: | |
90 | assert _input_string_transform_factory(ns.IGNORECASE | ns.LOWERCASEFIRST)(x) == x.swapcase().lower() | |
91 | ||
92 | ||
93 | @given(text()) | |
94 | def test_input_string_transform_factory_performs_swapcase_and_casefold_both_LOWERCASEFIRST_AND_IGNORECASE(x): | |
95 | if NEWPY: | |
96 | assert _input_string_transform_factory(ns.IGNORECASE | ns.LOWERCASEFIRST)(x) == x.swapcase().casefold() | |
97 | else: | |
98 | assert _input_string_transform_factory(ns.IGNORECASE | ns.LOWERCASEFIRST)(x) == x.swapcase().lower() | |
99 | ||
100 | ||
101 | def test_input_string_transform_factory_removes_thousands_separator_with_LOCALE_example(): | |
102 | load_locale('en_US') | |
103 | x = '12,543,642,642.534,534,980' # Without FLOAT it does not account for decimal. | |
104 | assert _input_string_transform_factory(ns.LOCALE)(x) == '12543642642.534534980' | |
105 | x = '12,543,642,642.534,534,980' # LOCALEALPHA doesn't do anything... need LOCALENUM | |
106 | assert _input_string_transform_factory(ns.LOCALEALPHA)(x) == '12,543,642,642.534,534,980' | |
107 | locale.setlocale(locale.LC_ALL, str('')) | |
108 | ||
109 | ||
110 | @given(lists(elements=integers(), min_size=4, max_size=20)) | |
111 | def test_input_string_transform_factory_removes_thousands_separator_with_LOCALE(x): | |
112 | load_locale('en_US') | |
113 | t = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, x)))) # Remove negative signs trailing L | |
114 | s = '' | |
115 | for i, y in enumerate(reversed(t), 1): | |
116 | s = y + s | |
117 | if i % 3 == 0 and i != len(t): | |
118 | s = ',' + s | |
119 | assert _input_string_transform_factory(ns.LOCALE)(s) == t | |
120 | locale.setlocale(locale.LC_ALL, str('')) | |
121 | ||
122 | ||
123 | def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT_example(): | |
124 | x = '12,543,642,642.534,534,980' | |
125 | assert _input_string_transform_factory(ns.LOCALE | ns.FLOAT)(x) == '12543642642.534,534980' | |
126 | ||
127 | ||
128 | @given(lists(elements=integers(), min_size=4, max_size=20), lists(elements=integers(), min_size=4, max_size=20)) | |
129 | def test_input_string_transform_factory_removes_thousands_separator_and_is_float_aware_with_LOCALE_and_FLOAT(x, y): | |
130 | load_locale('en_US') | |
131 | t = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, x)))) # Remove negative signs trailing L | |
132 | s = '' | |
133 | for i, z in enumerate(reversed(t), 1): | |
134 | s = z + s | |
135 | if i % 3 == 0 and i != len(t): | |
136 | s = ',' + s | |
137 | u = ''.join(map(methodcaller('rstrip', 'lL'), map(str, map(abs, y)))) # Remove negative signs trailing L | |
138 | v = '' | |
139 | for i, z in enumerate(reversed(u), 1): | |
140 | v = z + v | |
141 | if i % 3 == 0 and i != len(u): | |
142 | v = ',' + v | |
143 | # Remove all but first comma. | |
144 | a = v.split(',', 1) | |
145 | p = a[0] + ',' + a[1].replace(',', '') | |
146 | assert _input_string_transform_factory(ns.LOCALE)('.'.join([s, v])) == '.'.join([t, u]) | |
147 | assert _input_string_transform_factory(ns.LOCALE | ns.FLOAT)('.'.join([s, v])) == '.'.join([t, p]) | |
148 | locale.setlocale(locale.LC_ALL, str('')) | |
149 | ||
150 | ||
151 | # These might be too much to test with hypothesis. | |
152 | ||
153 | ||
154 | def test_input_string_transform_factory_leaves_invalid_thousands_separator_with_LOCALE_example(): | |
155 | load_locale('en_US') | |
156 | x = '12,543,642642.5345,34980' | |
157 | assert _input_string_transform_factory(ns.LOCALE)(x) == '12543,642642.5345,34980' | |
158 | x = '12,59443,642,642.53,4534980' | |
159 | assert _input_string_transform_factory(ns.LOCALE)(x) == '12,59443,642642.53,4534980' | |
160 | x = '12543,642,642.5,34534980' | |
161 | assert _input_string_transform_factory(ns.LOCALE)(x) == '12543,642642.5,34534980' | |
162 | locale.setlocale(locale.LC_ALL, str('')) | |
163 | ||
164 | ||
165 | # @pytest.mark.skipif(not has_locale_de_DE or dumb_sort(), reason='requires de_DE locale and working locale') | |
166 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale') | |
167 | def test_input_string_transform_factory_replaces_decimal_separator_with_LOCALE_example(): | |
168 | load_locale('de_DE') | |
169 | x = '1543,753' | |
170 | assert _input_string_transform_factory(ns.LOCALE)(x) == '1543,753' # Does nothing without FLOAT | |
171 | assert _input_string_transform_factory(ns.LOCALE | ns.FLOAT)(x) == '1543.753' | |
172 | assert _input_string_transform_factory(ns.LOCALEALPHA)(x) == '1543,753' # LOCALEALPHA doesn't do anything... need LOCALENUM | |
173 | locale.setlocale(locale.LC_ALL, str('')) | |
174 | ||
175 | ||
176 | # @pytest.mark.skipif(not has_locale_de_DE or dumb_sort(), reason='requires de_DE locale and working locale') | |
177 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale') | |
178 | def test_input_string_transform_factory_does_not_replace_invalid_decimal_separator_with_LOCALE_example(): | |
179 | load_locale('de_DE') | |
180 | x = '154s,t53' | |
181 | assert _input_string_transform_factory(ns.LOCALE | ns.FLOAT)(x) == '154s,t53' | |
182 | locale.setlocale(locale.LC_ALL, str('')) |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Test the locale help module module. | |
3 | """ | |
4 | from __future__ import unicode_literals | |
5 | ||
6 | import locale | |
7 | import pytest | |
8 | from math import isnan | |
9 | from itertools import chain | |
10 | from natsort.compat.fake_fastnumbers import fast_float, isfloat | |
11 | from natsort.locale_help import grouper, locale_convert | |
12 | from natsort.compat.py23 import py23_str | |
13 | from natsort.compat.locale import use_pyicu | |
14 | from compat.locale import ( | |
15 | load_locale, | |
16 | has_locale_de_DE, | |
17 | get_strxfrm, | |
18 | low, | |
19 | ) | |
20 | from compat.hypothesis import ( | |
21 | assume, | |
22 | given, | |
23 | use_hypothesis, | |
24 | ) | |
25 | ||
26 | ||
27 | # Each test has an "example" version for demonstrative purposes, | |
28 | # and a test that uses the hypothesis module. | |
29 | ||
30 | ||
31 | def test_grouper_returns_letters_with_lowercase_transform_of_letter_example(): | |
32 | assert grouper('HELLO', (fast_float, isfloat)) == 'hHeElLlLoO' | |
33 | assert grouper('hello', (fast_float, isfloat)) == 'hheelllloo' | |
34 | ||
35 | ||
36 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
37 | @given(py23_str) | |
38 | def test_grouper_returns_letters_with_lowercase_transform_of_letter(x): | |
39 | assume(type(fast_float(x)) is not float) | |
40 | assert grouper(x, (fast_float, isfloat)) == ''.join(chain.from_iterable([low(y), y] for y in x)) | |
41 | ||
42 | ||
43 | def test_grouper_returns_float_string_as_float_example(): | |
44 | assert grouper('45.8e-2', (fast_float, isfloat)) == 45.8e-2 | |
45 | ||
46 | ||
47 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
48 | @given(float) | |
49 | def test_grouper_returns_float_string_as_float(x): | |
50 | assume(not isnan(x)) | |
51 | assert grouper(repr(x), (fast_float, isfloat)) == x | |
52 | ||
53 | ||
54 | def test_locale_convert_transforms_float_string_to_float_example(): | |
55 | load_locale('en_US') | |
56 | assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 | |
57 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
58 | ||
59 | ||
60 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
61 | @given(float) | |
62 | def test_locale_convert_transforms_float_string_to_float(x): | |
63 | assume(not isnan(x)) | |
64 | load_locale('en_US') | |
65 | assert locale_convert(repr(x), (fast_float, isfloat), False) == x | |
66 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
67 | ||
68 | ||
69 | def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string_example(): | |
70 | load_locale('en_US') | |
71 | strxfrm = get_strxfrm() | |
72 | assert locale_convert('45,8', (fast_float, isfloat), False) == strxfrm('45,8') | |
73 | assert locale_convert('hello', (fast_float, isfloat), False) == strxfrm('hello') | |
74 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
75 | ||
76 | ||
77 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
78 | @given(py23_str) | |
79 | def test_locale_convert_transforms_nonfloat_string_to_strxfrm_string(x): | |
80 | assume(type(fast_float(x)) is not float) | |
81 | load_locale('en_US') | |
82 | strxfrm = get_strxfrm() | |
83 | assert locale_convert(x, (fast_float, isfloat), False) == strxfrm(x) | |
84 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
85 | ||
86 | ||
87 | def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters_example(): | |
88 | load_locale('en_US') | |
89 | strxfrm = get_strxfrm() | |
90 | assert locale_convert('hello', (fast_float, isfloat), True) == strxfrm('hheelllloo') | |
91 | assert locale_convert('45,8', (fast_float, isfloat), True) == strxfrm('4455,,88') | |
92 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
93 | ||
94 | ||
95 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
96 | @given(py23_str) | |
97 | def test_locale_convert_with_groupletters_transforms_nonfloat_string_to_strxfrm_string_with_grouped_letters(x): | |
98 | assume(type(fast_float(x)) is not float) | |
99 | load_locale('en_US') | |
100 | strxfrm = get_strxfrm() | |
101 | assert locale_convert(x, (fast_float, isfloat), True) == strxfrm(''.join(chain.from_iterable([low(y), y] for y in x))) | |
102 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
103 | ||
104 | ||
105 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
106 | def test_locale_convert_transforms_float_string_to_float_with_de_locale_example(): | |
107 | load_locale('de_DE') | |
108 | assert locale_convert('45.8', (fast_float, isfloat), False) == 45.8 | |
109 | assert locale_convert('45,8', (fast_float, isfloat), False) == 45.8 | |
110 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
111 | ||
112 | ||
113 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
114 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
115 | @given(float) | |
116 | def test_locale_convert_transforms_float_string_to_float_with_de_locale(x): | |
117 | assume(not isnan(x)) | |
118 | load_locale('de_DE') | |
119 | assert locale_convert(repr(x), (fast_float, isfloat), False) == x | |
120 | assert locale_convert(repr(x).replace('.', ','), (fast_float, isfloat), False) == x | |
121 | locale.setlocale(locale.LC_NUMERIC, str('')) |
2 | 2 | Test the natsort command-line tool functions. |
3 | 3 | """ |
4 | 4 | from __future__ import print_function, unicode_literals |
5 | import pytest | |
6 | 5 | import re |
7 | 6 | import sys |
8 | 7 | from pytest import raises |
9 | 8 | from compat.mock import patch, call |
10 | from compat.hypothesis import ( | |
11 | assume, | |
9 | from hypothesis import ( | |
12 | 10 | given, |
13 | integers_from, | |
14 | integers_in_range, | |
15 | sampled_from, | |
16 | use_hypothesis, | |
11 | ) | |
12 | from hypothesis.strategies import ( | |
13 | integers, | |
14 | floats, | |
15 | lists, | |
16 | data, | |
17 | 17 | ) |
18 | 18 | from natsort.__main__ import ( |
19 | 19 | main, |
22 | 22 | keep_entry_range, |
23 | 23 | exclude_entry, |
24 | 24 | sort_and_print_entries, |
25 | py23_str, | |
26 | 25 | ) |
27 | 26 | |
28 | 27 | |
65 | 64 | |
66 | 65 | class Args: |
67 | 66 | """A dummy class to simulate the argparse Namespace object""" |
68 | def __init__(self, filter, reverse_filter, exclude, as_path, reverse): | |
69 | self.filter = filter | |
67 | def __init__(self, filt, reverse_filter, exclude, as_path, reverse): | |
68 | self.filter = filt | |
70 | 69 | self.reverse_filter = reverse_filter |
71 | 70 | self.exclude = exclude |
72 | 71 | self.reverse = reverse |
170 | 169 | assert range_check(6.4, 30) == (6.4, 30.0) |
171 | 170 | |
172 | 171 | |
173 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
174 | @given(x=int, y=int) | |
175 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, y): | |
176 | assume(x < y) | |
177 | assert range_check(x, y) == (float(x), float(y)) | |
178 | ||
179 | ||
180 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
181 | @given(x=float, y=float) | |
182 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, y): | |
183 | assume(x < y) | |
172 | @given(x=integers(), data=data()) # Defer data selection for y till test is run. | |
173 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second(x, data): | |
174 | # Pull data such that the first is less than the second. | |
175 | y = data.draw(integers(min_value=x + 1)) | |
176 | assert range_check(x, y) == (x, y) | |
177 | ||
178 | ||
179 | @given(x=floats(allow_nan=False, min_value=-1E8, max_value=1E8), data=data()) # Defer data selection for y till test is run. | |
180 | def test_range_check_returns_range_as_is_but_with_floats_if_first_is_less_than_second2(x, data): | |
181 | # Pull data such that the first is less than the second. | |
182 | y = data.draw(floats(min_value=x + 1.0, max_value=1E9, allow_nan=False)) | |
184 | 183 | assert range_check(x, y) == (x, y) |
185 | 184 | |
186 | 185 | |
190 | 189 | assert str(err.value) == 'low >= high' |
191 | 190 | |
192 | 191 | |
193 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
194 | @given(x=float, y=float) | |
195 | def test_range_check_raises_ValueError_if_second_is_less_than_first(x, y): | |
196 | assume(x >= y) | |
197 | with raises(ValueError) as err: | |
198 | range_check(x, x) | |
192 | @given(x=floats(allow_nan=False), data=data()) # Defer data selection for y till test is run. | |
193 | def test_range_check_raises_ValueError_if_second_is_less_than_first(x, data): | |
194 | # Pull data such that the first is greater than or equal to the second. | |
195 | y = data.draw(floats(max_value=x, allow_nan=False)) | |
196 | with raises(ValueError) as err: | |
197 | range_check(x, y) | |
199 | 198 | assert str(err.value) == 'low >= high' |
200 | 199 | |
201 | 200 | |
205 | 204 | assert check_filter(None) is None |
206 | 205 | |
207 | 206 | |
208 | def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid_example(): | |
209 | assert check_filter([(6, 7)]) == [(6.0, 7.0)] | |
210 | assert check_filter([(6, 7), (2, 8)]) == [(6.0, 7.0), (2.0, 8.0)] | |
211 | ||
212 | ||
213 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
214 | @given(x=(int, int, float, float), y=(int, float, float, int)) | |
215 | def test_check_filter_converts_filter_numbers_to_floats_if_filter_is_valid(x, y): | |
216 | assume(all(i < j for i, j in zip(x, y))) | |
217 | assert check_filter(list(zip(x, y))) == [(float(i), float(j)) for i, j in zip(x, y)] | |
207 | def test_check_filter_returns_input_as_is_if_filter_is_valid_example(): | |
208 | assert check_filter([(6, 7)]) == [(6, 7)] | |
209 | assert check_filter([(6, 7), (2, 8)]) == [(6, 7), (2, 8)] | |
210 | ||
211 | ||
212 | @given(x=lists(integers(), min_size=1), data=data()) # Defer data selection for y till test is run. | |
213 | def test_check_filter_returns_input_as_is_if_filter_is_valid(x, data): | |
214 | y = [data.draw(integers(min_value=val + 1)) for val in x] # ensure y is element-wise greater than x | |
215 | assert check_filter(list(zip(x, y))) == [(i, j) for i, j in zip(x, y)] | |
218 | 216 | |
219 | 217 | |
220 | 218 | def test_check_filter_raises_ValueError_if_filter_is_invalid_example(): |
223 | 221 | assert str(err.value) == 'Error in --filter: low >= high' |
224 | 222 | |
225 | 223 | |
226 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
227 | @given(x=(int, int, float, float), y=(int, float, float, int)) | |
228 | def test_check_filter_raises_ValueError_if_filter_is_invalid(x, y): | |
229 | assume(any(i >= j for i, j in zip(x, y))) | |
224 | @given(x=lists(integers(), min_size=1), data=data()) # Defer data selection for y till test is run. | |
225 | def test_check_filter_raises_ValueError_if_filter_is_invalid(x, data): | |
226 | y = [data.draw(integers(max_value=val)) for val in x] # ensure y is element-wise less than or equal to x | |
230 | 227 | with raises(ValueError) as err: |
231 | 228 | check_filter(list(zip(x, y))) |
232 | 229 | assert str(err.value) == 'Error in --filter: low >= high' |
236 | 233 | assert keep_entry_range('a56b23c89', [0], [100], int, re.compile(r'\d+')) |
237 | 234 | |
238 | 235 | |
239 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
240 | @given((py23_str, integers_in_range(1, 99), py23_str, integers_in_range(1, 99), py23_str)) | |
241 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_the_range_bounds(x): | |
242 | s = ''.join(map(py23_str, x)) | |
243 | assume(any(0 < int(i) < 100 for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
244 | assert keep_entry_range(s, [0], [100], int, re.compile(r'\d+')) | |
245 | ||
246 | ||
247 | 236 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds_example(): |
248 | 237 | assert keep_entry_range('a56b23c89', [1, 88], [20, 90], int, re.compile(r'\d+')) |
249 | 238 | |
250 | 239 | |
251 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
252 | @given((py23_str, integers_in_range(2, 89), py23_str, integers_in_range(2, 89), py23_str)) | |
253 | def test_keep_entry_range_returns_True_if_any_portion_of_input_is_between_any_range_bounds(x): | |
254 | s = ''.join(map(py23_str, x)) | |
255 | assume(any((1 < int(i) < 20) or (88 < int(i) < 90) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
256 | assert keep_entry_range(s, [1, 88], [20, 90], int, re.compile(r'\d+')) | |
257 | ||
258 | ||
259 | 240 | def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds_example(): |
260 | 241 | assert not keep_entry_range('a56b23c89', [1], [20], int, re.compile(r'\d+')) |
261 | 242 | |
262 | 243 | |
263 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
264 | @given((py23_str, integers_from(21), py23_str, integers_from(21), py23_str)) | |
265 | def test_keep_entry_range_returns_False_if_no_portion_of_input_is_between_the_range_bounds(x): | |
266 | s = ''.join(map(py23_str, x)) | |
267 | assume(all(not (1 <= int(i) <= 20) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
268 | assert not keep_entry_range(s, [1], [20], int, re.compile(r'\d+')) | |
269 | ||
270 | ||
271 | 244 | def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input_example(): |
272 | 245 | assert exclude_entry('a56b23c89', [100, 45], int, re.compile(r'\d+')) |
273 | 246 | |
274 | 247 | |
275 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
276 | @given((py23_str, integers_from(0), py23_str, integers_from(0), py23_str)) | |
277 | def test_exclude_entry_returns_True_if_exlcude_parameters_are_not_in_input(x): | |
278 | s = ''.join(map(py23_str, x)) | |
279 | assume(not any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
280 | assert exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) | |
281 | ||
282 | ||
283 | 248 | def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input_example(): |
284 | 249 | assert not exclude_entry('a56b23c89', [23], int, re.compile(r'\d+')) |
285 | ||
286 | ||
287 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
288 | @given((py23_str, sampled_from([23, 45, 87]), py23_str, sampled_from([23, 45, 87]), py23_str)) | |
289 | def test_exclude_entry_returns_False_if_exlcude_parameters_are_in_input(x): | |
290 | s = ''.join(map(py23_str, x)) | |
291 | assume(any(int(i) in (23, 45, 87) for i in re.findall(r'\d+', s) if re.match(r'\d+$', i))) | |
292 | assert not exclude_entry(s, [23, 45, 87], int, re.compile(r'\d+')) |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Here are a collection of examples of how this module can be used. | |
3 | See the README or the natsort homepage for more details. | |
4 | """ | |
5 | from __future__ import unicode_literals, print_function | |
6 | import pytest | |
7 | import sys | |
8 | import warnings | |
9 | import locale | |
10 | from operator import itemgetter | |
11 | from pytest import raises | |
12 | from natsort import ( | |
13 | natsorted, | |
14 | index_natsorted, | |
15 | natsort_key, | |
16 | versorted, | |
17 | index_versorted, | |
18 | humansorted, | |
19 | index_humansorted, | |
20 | natsort_keygen, | |
21 | order_by_index, | |
22 | ns, | |
23 | realsorted, | |
24 | index_realsorted, | |
25 | decoder, | |
26 | as_ascii, | |
27 | as_utf8, | |
28 | ) | |
29 | from compat.locale import load_locale, has_locale_de_DE | |
30 | from natsort.utils import _natsort_key | |
31 | ||
32 | ||
33 | def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is(): | |
34 | f = decoder('latin1') | |
35 | a = 'bytes' | |
36 | b = 14 | |
37 | assert f(b'bytes') == a | |
38 | assert f(b) is b # returns as-is, same object ID | |
39 | if sys.version[0] == '3': | |
40 | assert f(a) is a # same object returned on Python3 b/c only bytes has decode | |
41 | else: | |
42 | assert f(a) is not a | |
43 | assert f(a) == a # not same object on Python2 because str can decode | |
44 | ||
45 | ||
46 | def test_as_ascii_returns_bytes_as_ascii(): | |
47 | assert decoder('ascii')(b'bytes') == as_ascii(b'bytes') | |
48 | ||
49 | ||
50 | def test_as_utf8_returns_bytes_as_utf8(): | |
51 | assert decoder('utf8')(b'bytes') == as_utf8(b'bytes') | |
52 | ||
53 | ||
54 | def test_natsort_key_public_raises_DeprecationWarning_when_called(): | |
55 | # Identical to _natsort_key | |
56 | # But it raises a deprecation warning | |
57 | with warnings.catch_warnings(record=True) as w: | |
58 | warnings.simplefilter("always") | |
59 | assert natsort_key('a-5.034e2') == _natsort_key('a-5.034e2', key=None, alg=ns.I) | |
60 | assert len(w) == 1 | |
61 | assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) | |
62 | # It is called for each element in a list when sorting | |
63 | with warnings.catch_warnings(record=True) as w: | |
64 | warnings.simplefilter("always") | |
65 | a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] | |
66 | a.sort(key=natsort_key) | |
67 | assert len(w) == 7 | |
68 | ||
69 | ||
70 | def test_natsort_keygen_returns_natsort_key_with_alg_option(): | |
71 | a = 'a-5.034e1' | |
72 | assert natsort_keygen()(a) == _natsort_key(a, None, ns.I) | |
73 | assert natsort_keygen(alg=ns.F | ns.S)(a) == _natsort_key(a, None, ns.F | ns.S) | |
74 | ||
75 | ||
76 | def test_natsort_keygen_with_key_returns_same_result_as_nested_lambda_with_bare_natsort_key(): | |
77 | a = 'a-5.034e1' | |
78 | f1 = natsort_keygen(key=lambda x: x.upper()) | |
79 | ||
80 | def f2(x): | |
81 | return _natsort_key(x, lambda y: y.upper(), ns.I) | |
82 | assert f1(a) == f2(a) | |
83 | ||
84 | ||
85 | def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted(): | |
86 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
87 | b = a[:] | |
88 | a.sort(key=natsort_keygen(alg=ns.F)) | |
89 | assert a == natsorted(b, alg=ns.F) | |
90 | ||
91 | ||
92 | def test_natsorted_returns_strings_with_numbers_in_ascending_order(): | |
93 | a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] | |
94 | assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10'] | |
95 | ||
96 | ||
97 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents(): | |
98 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
99 | assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
100 | ||
101 | ||
102 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option(): | |
103 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
104 | assert natsorted(a, alg=ns.N | ns.F | ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
105 | # UNSIGNED is default | |
106 | assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
107 | ||
108 | ||
109 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option(): | |
110 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
111 | assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
112 | # INT is default | |
113 | assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
114 | ||
115 | ||
116 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option(): | |
117 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
118 | assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
119 | assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
120 | ||
121 | ||
122 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option(): | |
123 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
124 | assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
125 | ||
126 | ||
127 | def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option(): | |
128 | a = ['a-5', 'a7', 'a+2'] | |
129 | assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7'] | |
130 | ||
131 | ||
132 | def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option(): | |
133 | a = ['a-5', 'a7', 'a+2'] | |
134 | assert natsorted(a) == ['a7', 'a+2', 'a-5'] | |
135 | ||
136 | ||
137 | def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option(): | |
138 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
139 | assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
140 | assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
141 | ||
142 | ||
143 | def test_natsorted_returns_sorted_list_with_mixed_type_input_and_does_not_raise_TypeError_on_Python3(): | |
144 | # You can mix types with natsorted. This can get around the new | |
145 | # 'unorderable types' issue with Python 3. | |
146 | a = [6, 4.5, '7', '2.5', 'a'] | |
147 | assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a'] | |
148 | a = [46, '5a5b2', 'af5', '5a5-4'] | |
149 | assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5'] | |
150 | ||
151 | ||
152 | def test_natsorted_with_mixed_input_returns_sorted_results_without_error(): | |
153 | a = ['2', 'ä', 'b', 1.5, 3] | |
154 | assert natsorted(a) == [1.5, '2', 3, 'b', 'ä'] | |
155 | ||
156 | ||
157 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST(): | |
158 | a = ['25', 5, float('nan'), 1E40] | |
159 | # The slice is because NaN != NaN | |
160 | assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3] | |
161 | ||
162 | ||
163 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST(): | |
164 | a = ['25', 5, float('nan'), 1E40] | |
165 | # The slice is because NaN != NaN | |
166 | assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:] | |
167 | ||
168 | ||
169 | def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3(): | |
170 | if sys.version[0] == '3': | |
171 | with raises(TypeError) as e: | |
172 | assert natsorted(['ä', b'b']) | |
173 | assert 'bytes' in str(e.value) | |
174 | else: | |
175 | assert True | |
176 | ||
177 | ||
178 | def test_natsorted_raises_ValueError_for_non_iterable_input(): | |
179 | with raises(TypeError) as err: | |
180 | natsorted(100) | |
181 | assert str(err.value) == "'int' object is not iterable" | |
182 | ||
183 | ||
184 | def test_natsorted_recursivley_applies_key_to_nested_lists_to_return_sorted_nested_list(): | |
185 | data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] | |
186 | assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] | |
187 | ||
188 | ||
189 | def test_natsorted_applies_key_to_each_list_element_before_sorting_list(): | |
190 | b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] | |
191 | assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] | |
192 | ||
193 | ||
194 | def test_natsorted_returns_list_in_reversed_order_with_reverse_option(): | |
195 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
196 | assert natsorted(a, reverse=True) == natsorted(a)[::-1] | |
197 | ||
198 | ||
199 | def test_natsorted_sorts_OS_generated_paths_incorrectly_without_PATH_option(): | |
200 | a = ['/p/Folder (10)/file.tar.gz', | |
201 | '/p/Folder/file.tar.gz', | |
202 | '/p/Folder (1)/file (1).tar.gz', | |
203 | '/p/Folder (1)/file.tar.gz'] | |
204 | assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz', | |
205 | '/p/Folder (1)/file.tar.gz', | |
206 | '/p/Folder (10)/file.tar.gz', | |
207 | '/p/Folder/file.tar.gz'] | |
208 | ||
209 | ||
210 | def test_natsorted_sorts_OS_generated_paths_correctly_with_PATH_option(): | |
211 | a = ['/p/Folder (10)/file.tar.gz', | |
212 | '/p/Folder/file.tar.gz', | |
213 | '/p/Folder (1)/file (1).tar.gz', | |
214 | '/p/Folder (1)/file.tar.gz'] | |
215 | assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz', | |
216 | '/p/Folder (1)/file.tar.gz', | |
217 | '/p/Folder (1)/file (1).tar.gz', | |
218 | '/p/Folder (10)/file.tar.gz'] | |
219 | ||
220 | ||
221 | def test_natsorted_can_handle_sorting_paths_and_numbers_with_PATH(): | |
222 | # You can sort paths and numbers, not that you'd want to | |
223 | a = ['/Folder (9)/file.exe', 43] | |
224 | assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe'] | |
225 | ||
226 | ||
227 | def test_natsorted_returns_results_in_ASCII_order_with_no_case_options(): | |
228 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
229 | assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
230 | ||
231 | ||
232 | def test_natsorted_returns_results_sorted_by_lowercase_ASCII_order_with_IGNORECASE(): | |
233 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
234 | assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
235 | ||
236 | ||
237 | def test_natsorted_returns_results_in_ASCII_order_but_with_lowercase_letters_first_with_LOWERCASEFIRST(): | |
238 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
239 | assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
240 | ||
241 | ||
242 | def test_natsorted_returns_results_with_uppercase_and_lowercase_letters_grouped_together_with_GROUPLETTERS(): | |
243 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
244 | assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
245 | ||
246 | ||
247 | def test_natsorted_returns_results_in_natural_order_with_GROUPLETTERS_and_LOWERCASEFIRST(): | |
248 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
249 | assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
250 | ||
251 | ||
252 | def test_natsorted_places_uppercase_letters_before_lowercase_letters_for_nested_input(): | |
253 | b = [('A5', 'a6'), ('a3', 'a1')] | |
254 | assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')] | |
255 | ||
256 | ||
257 | def test_natsorted_with_LOWERCASEFIRST_places_lowercase_letters_before_uppercase_letters_for_nested_input(): | |
258 | b = [('A5', 'a6'), ('a3', 'a1')] | |
259 | assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')] | |
260 | ||
261 | ||
262 | def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input(): | |
263 | b = [('A5', 'a6'), ('a3', 'a1')] | |
264 | assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')] | |
265 | ||
266 | ||
267 | def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters(): | |
268 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
269 | load_locale('en_US') | |
270 | assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
271 | locale.setlocale(locale.LC_ALL, str('')) | |
272 | ||
273 | ||
274 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped(): | |
275 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
276 | load_locale('en_US') | |
277 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
278 | locale.setlocale(locale.LC_ALL, str('')) | |
279 | ||
280 | ||
281 | def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters(): | |
282 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
283 | load_locale('en_US') | |
284 | assert natsorted(a, alg=ns.LOCALE | ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
285 | locale.setlocale(locale.LC_ALL, str('')) | |
286 | ||
287 | ||
288 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped(): | |
289 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
290 | load_locale('en_US') | |
291 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST | ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
292 | locale.setlocale(locale.LC_ALL, str('')) | |
293 | ||
294 | ||
295 | def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language(): | |
296 | load_locale('en_US') | |
297 | a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] | |
298 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,6', 'a5,50', 'ä', 'b', 'c'] | |
299 | locale.setlocale(locale.LC_ALL, str('')) | |
300 | ||
301 | ||
302 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale') | |
303 | def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): | |
304 | load_locale('de_DE') | |
305 | a = ['c', 'ä', 'b', 'a5,6', 'a5,50'] | |
306 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'ä', 'b', 'c'] | |
307 | locale.setlocale(locale.LC_ALL, str('')) | |
308 | ||
309 | ||
310 | def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): | |
311 | load_locale('en_US') | |
312 | a = ['0', 'Á', '2', 'Z'] | |
313 | assert natsorted(a) == ['0', '2', 'Z', 'Á'] | |
314 | a = ['2', 'ä', 'b', 1.5, 3] | |
315 | assert natsorted(a, alg=ns.LOCALE) == [1.5, '2', 3, 'ä', 'b'] | |
316 | locale.setlocale(locale.LC_ALL, str('')) | |
317 | ||
318 | ||
319 | def test_versorted_returns_results_identical_to_natsorted(): | |
320 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
321 | # versorted is retained for backwards compatibility | |
322 | assert versorted(a) == natsorted(a) | |
323 | ||
324 | ||
325 | def test_realsorted_returns_results_identical_to_natsorted_with_REAL(): | |
326 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
327 | assert realsorted(a) == natsorted(a, alg=ns.REAL) | |
328 | ||
329 | ||
330 | def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE(): | |
331 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
332 | assert humansorted(a) == natsorted(a, alg=ns.LOCALE) | |
333 | ||
334 | ||
335 | def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list(): | |
336 | a = ['num3', 'num5', 'num2'] | |
337 | b = ['foo', 'bar', 'baz'] | |
338 | index = index_natsorted(a) | |
339 | assert index == [2, 0, 1] | |
340 | assert [a[i] for i in index] == ['num2', 'num3', 'num5'] | |
341 | assert [b[i] for i in index] == ['baz', 'foo', 'bar'] | |
342 | ||
343 | ||
344 | def test_index_natsorted_returns_reversed_integer_list_of_sort_order_for_input_list_with_reverse_option(): | |
345 | a = ['num3', 'num5', 'num2'] | |
346 | assert index_natsorted(a, reverse=True) == [1, 0, 2] | |
347 | ||
348 | ||
349 | def test_index_natsorted_applies_key_function_before_sorting(): | |
350 | c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] | |
351 | assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1] | |
352 | ||
353 | ||
354 | def test_index_natsorted_handles_unorderable_types_error_on_Python3(): | |
355 | a = [46, '5a5b2', 'af5', '5a5-4'] | |
356 | assert index_natsorted(a) == [3, 1, 0, 2] | |
357 | ||
358 | ||
359 | def test_index_natsorted_returns_integer_list_of_nested_input_list(): | |
360 | data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] | |
361 | assert index_natsorted(data) == [0, 1, 3, 2] | |
362 | ||
363 | ||
364 | def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_with_PATH(): | |
365 | a = ['/p/Folder (10)/', | |
366 | '/p/Folder/', | |
367 | '/p/Folder (1)/'] | |
368 | assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] | |
369 | ||
370 | ||
371 | def test_index_versorted_returns_results_identical_to_index_natsorted(): | |
372 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
373 | # index_versorted is retained for backwards compatibility | |
374 | assert index_versorted(a) == index_natsorted(a) | |
375 | ||
376 | ||
377 | def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL(): | |
378 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
379 | assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL) | |
380 | ||
381 | ||
382 | def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): | |
383 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
384 | assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) | |
385 | ||
386 | ||
387 | def test_order_by_index_sorts_list_according_to_order_of_integer_list(): | |
388 | a = ['num3', 'num5', 'num2'] | |
389 | index = [2, 0, 1] | |
390 | assert order_by_index(a, index) == ['num2', 'num3', 'num5'] | |
391 | assert order_by_index(a, index) == [a[i] for i in index] | |
392 | ||
393 | ||
394 | def test_order_by_index_returns_generator_with_iter_True(): | |
395 | a = ['num3', 'num5', 'num2'] | |
396 | index = [2, 0, 1] | |
397 | assert order_by_index(a, index, True) != [a[i] for i in index] | |
398 | assert list(order_by_index(a, index, True)) == [a[i] for i in index] |
0 | # -*- coding: utf-8 -*- | |
1 | # pylint: disable=unused-variable | |
2 | """These test the natcmp() function. | |
3 | ||
4 | Note that these tests are only relevant for Python version < 3. | |
5 | """ | |
6 | import sys | |
7 | from functools import partial | |
8 | from compat.mock import patch | |
9 | ||
10 | import pytest | |
11 | from hypothesis import given | |
12 | from hypothesis.strategies import floats, integers, lists | |
13 | ||
14 | from natsort import ns | |
15 | ||
16 | from natsort.compat.py23 import py23_cmp | |
17 | ||
18 | PY_VERSION = float(sys.version[:3]) | |
19 | ||
20 | if PY_VERSION < 3: | |
21 | from natsort import natcmp | |
22 | ||
23 | ||
24 | class Comparable(object): | |
25 | """Stub class for testing natcmp functionality.""" | |
26 | def __init__(self, value): | |
27 | self.value = value | |
28 | ||
29 | def __cmp__(self, other): | |
30 | return natcmp(self.value, other.value) | |
31 | ||
32 | ||
33 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
34 | def test__classes_can_be_compared(): | |
35 | one = Comparable("1") | |
36 | two = Comparable("2") | |
37 | another_two = Comparable("2") | |
38 | ten = Comparable("10") | |
39 | ||
40 | assert ten > two == another_two > one | |
41 | ||
42 | ||
43 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
44 | def test__keys_are_being_cached(): | |
45 | natcmp.cached_keys = {} | |
46 | assert len(natcmp.cached_keys) == 0 | |
47 | natcmp(0, 0) | |
48 | assert len(natcmp.cached_keys) == 1 | |
49 | natcmp(0, 0) | |
50 | assert len(natcmp.cached_keys) == 1 | |
51 | ||
52 | with patch('natsort.compat.locale.dumb_sort', return_value=False): | |
53 | natcmp(0, 0, alg=ns.L) | |
54 | assert len(natcmp.cached_keys) == 2 | |
55 | natcmp(0, 0, alg=ns.L) | |
56 | assert len(natcmp.cached_keys) == 2 | |
57 | ||
58 | with patch('natsort.compat.locale.dumb_sort', return_value=True): | |
59 | natcmp(0, 0, alg=ns.L) | |
60 | assert len(natcmp.cached_keys) == 3 | |
61 | natcmp(0, 0, alg=ns.L) | |
62 | assert len(natcmp.cached_keys) == 3 | |
63 | ||
64 | ||
65 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
66 | def test__illegal_algorithm_raises_error(): | |
67 | try: | |
68 | natcmp(0, 0, alg="Just random stuff") | |
69 | assert False | |
70 | ||
71 | except ValueError: | |
72 | assert True | |
73 | ||
74 | except Exception: | |
75 | assert False | |
76 | ||
77 | ||
78 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
79 | def test__classes_can_utilize_max_or_min(): | |
80 | comparables = [Comparable(i) for i in range(10)] | |
81 | ||
82 | assert max(comparables) == comparables[-1] | |
83 | assert min(comparables) == comparables[0] | |
84 | ||
85 | ||
86 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
87 | @given(integers(), integers()) | |
88 | def test__natcmp_works_the_same_for_integers_as_cmp(x, y): | |
89 | assert py23_cmp(x, y) == natcmp(x, y) | |
90 | ||
91 | ||
92 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
93 | @given(floats(allow_nan=False), floats(allow_nan=False)) | |
94 | def test__natcmp_works_the_same_for_floats_as_cmp(x, y): | |
95 | assert py23_cmp(x, y) == natcmp(x, y) | |
96 | ||
97 | ||
98 | @pytest.mark.skipif(PY_VERSION >= 3.0, reason='cmp() deprecated in Python 3') | |
99 | @given(lists(elements=integers())) | |
100 | def test_sort_strings_with_numbers(a_list): | |
101 | strings = [str(var) for var in a_list] | |
102 | natcmp_sorted = sorted(strings, cmp=partial(natcmp, alg=ns.SIGNED)) | |
103 | ||
104 | assert sorted(a_list) == [int(var) for var in natcmp_sorted] |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import pytest | |
5 | from natsort.compat.py23 import PY_VERSION | |
6 | from natsort.ns_enum import ns | |
7 | from natsort.utils import ( | |
8 | _natsort_key, | |
9 | _regex_chooser, | |
10 | _parse_string_factory, | |
11 | _parse_path_factory, | |
12 | _parse_number_factory, | |
13 | _parse_bytes_factory, | |
14 | _input_string_transform_factory, | |
15 | _string_component_transform_factory, | |
16 | _final_data_transform_factory, | |
17 | ) | |
18 | from hypothesis import ( | |
19 | given, | |
20 | ) | |
21 | from hypothesis.strategies import ( | |
22 | lists, | |
23 | text, | |
24 | floats, | |
25 | integers, | |
26 | binary, | |
27 | ) | |
28 | ||
29 | if PY_VERSION >= 3: | |
30 | long = int | |
31 | ||
32 | ||
33 | regex = _regex_chooser[ns.INT] | |
34 | pre = _input_string_transform_factory(ns.INT) | |
35 | post = _string_component_transform_factory(ns.INT) | |
36 | after = _final_data_transform_factory(ns.INT, '', '') | |
37 | string_func = _parse_string_factory(ns.INT, '', regex.split, pre, post, after) | |
38 | bytes_func = _parse_bytes_factory(ns.INT) | |
39 | num_func = _parse_number_factory(ns.INT, '', '') | |
40 | ||
41 | ||
42 | def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): | |
43 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
44 | # so it will sort against the other as_path results. | |
45 | sfunc = _parse_path_factory(string_func) | |
46 | bytes_func = _parse_bytes_factory(ns.PATH) | |
47 | num_func = _parse_number_factory(ns.PATH, '', '') | |
48 | assert _natsort_key(10, None, sfunc, bytes_func, num_func) == (('', 10),) | |
49 | ||
50 | ||
51 | @pytest.mark.skipif(PY_VERSION < 3, reason='only valid on python3') | |
52 | def test__natsort_key_with_bytes_input_and_PATH_returns_number_in_nested_tuple(): | |
53 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
54 | # so it will sort against the other as_path results. | |
55 | sfunc = _parse_path_factory(string_func) | |
56 | bytes_func = _parse_bytes_factory(ns.PATH) | |
57 | num_func = _parse_number_factory(ns.PATH, '', '') | |
58 | assert _natsort_key(b'/hello/world', None, sfunc, bytes_func, num_func) == ((b'/hello/world',),) | |
59 | ||
60 | ||
61 | def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): | |
62 | # PATH also handles recursion well. | |
63 | sfunc = _parse_path_factory(string_func) | |
64 | bytes_func = _parse_bytes_factory(ns.PATH) | |
65 | num_func = _parse_number_factory(ns.PATH, '', '') | |
66 | assert _natsort_key(('/Folder', '/Folder (1)'), None, sfunc, bytes_func, num_func) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) | |
67 | ||
68 | ||
69 | # The remaining tests provide no examples, just hypothesis tests. | |
70 | # They only confirm that _natsort_key uses the above building blocks. | |
71 | ||
72 | ||
73 | @given(floats(allow_nan=False) | integers()) | |
74 | def test__natsort_key_with_numeric_input_takes_number_path(x): | |
75 | assert _natsort_key(x, None, string_func, bytes_func, num_func) == num_func(x) | |
76 | ||
77 | ||
78 | @pytest.mark.skipif(PY_VERSION < 3, reason='only valid on python3') | |
79 | @given(binary().filter(bool)) | |
80 | def test__natsort_key_with_bytes_input_takes_bytes_path(x): | |
81 | assert _natsort_key(x, None, string_func, bytes_func, num_func) == bytes_func(x) | |
82 | ||
83 | ||
84 | @given(lists(elements=floats(allow_nan=False) | text() | integers(), min_size=1, max_size=10)) | |
85 | def test__natsort_key_with_text_input_takes_string_path(x): | |
86 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
87 | assert _natsort_key(s, None, string_func, bytes_func, num_func) == string_func(s) | |
88 | ||
89 | ||
90 | @given(lists(elements=text(), min_size=1, max_size=10)) | |
91 | def test__natsort_key_with_nested_input_takes_nested_path(x): | |
92 | assert _natsort_key(x, None, string_func, bytes_func, num_func) == tuple(string_func(s) for s in x) | |
93 | ||
94 | ||
95 | @given(text()) | |
96 | def test__natsort_key_with_key_argument_applies_key_before_processing(x): | |
97 | assert _natsort_key(x, len, string_func, bytes_func, num_func) == num_func(len(x)) |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Here are a collection of examples of how this module can be used. | |
3 | See the README or the natsort homepage for more details. | |
4 | """ | |
5 | from __future__ import unicode_literals, print_function | |
6 | ||
7 | import warnings | |
8 | import locale | |
9 | from pytest import raises | |
10 | from natsort import ( | |
11 | natsorted, | |
12 | natsort_key, | |
13 | natsort_keygen, | |
14 | ns, | |
15 | ) | |
16 | from natsort.compat.py23 import PY_VERSION | |
17 | from natsort.compat.locale import ( | |
18 | null_string_locale, | |
19 | get_strxfrm, | |
20 | ) | |
21 | from compat.mock import patch | |
22 | from compat.locale import load_locale | |
23 | ||
24 | INPUT = ['6A-5.034e+1', '/Folder (1)/Foo', 56.7] | |
25 | ||
26 | ||
27 | def test_natsort_key_public_raises_DeprecationWarning_when_called(): | |
28 | # But it raises a deprecation warning | |
29 | with warnings.catch_warnings(record=True) as w: | |
30 | warnings.simplefilter("always") | |
31 | assert natsort_key('a-5.034e2') == ('a-', 5, '.', 34, 'e', 2) | |
32 | assert len(w) == 1 | |
33 | assert "natsort_key is deprecated as of 3.4.0, please use natsort_keygen" in str(w[-1].message) | |
34 | # It is called for each element in a list when sorting | |
35 | with warnings.catch_warnings(record=True) as w: | |
36 | warnings.simplefilter("always") | |
37 | a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] | |
38 | a.sort(key=natsort_key) | |
39 | assert len(w) == 7 | |
40 | ||
41 | ||
42 | def test_natsort_keygen_with_invalid_alg_input_raises_ValueError(): | |
43 | # Invalid arguments give the correct response | |
44 | with raises(ValueError) as err: | |
45 | natsort_keygen(None, '1') | |
46 | assert str(err.value) == "natsort_keygen: 'alg' argument must be from the enum 'ns', got 1" | |
47 | ||
48 | ||
49 | def test_natsort_keygen_returns_natsort_key_that_parses_input(): | |
50 | a = 'a-5.034e1' | |
51 | assert natsort_keygen()(a) == ('a-', 5, '.', 34, 'e', 1) | |
52 | assert natsort_keygen(alg=ns.F | ns.S)(a) == ('a', -50.34) | |
53 | ||
54 | ||
55 | def test_natsort_keygen_returns_key_that_can_be_used_to_sort_list_in_place_with_same_result_as_natsorted(): | |
56 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
57 | b = a[:] | |
58 | a.sort(key=natsort_keygen(alg=ns.F)) | |
59 | assert a == natsorted(b, alg=ns.F) | |
60 | ||
61 | ||
62 | def test_natsort_keygen_splits_input_with_defaults(): | |
63 | assert natsort_keygen()(INPUT) == (('', 6, 'A-', 5, '.', 34, 'e+', 1), ('/Folder (', 1, ')/Foo'), ('', 56.7)) | |
64 | if PY_VERSION >= 3: assert natsort_keygen()(b'6A-5.034e+1') == (b'6A-5.034e+1',) | |
65 | ||
66 | ||
67 | def test_natsort_keygen_splits_input_with_real(): | |
68 | assert natsort_keygen(alg=ns.R)(INPUT) == (('', 6.0, 'A', -50.34), ('/Folder (', 1.0, ')/Foo'), ('', 56.7)) | |
69 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.R)(b'6A-5.034e+1') == (b'6A-5.034e+1',) | |
70 | ||
71 | ||
72 | def test_natsort_keygen_splits_input_with_lowercasefirst_noexp_float(): | |
73 | assert natsort_keygen(alg=ns.LF | ns.F | ns.N)(INPUT) == (('', 6.0, 'a-', 5.034, 'E+', 1.0), ('/fOLDER (', 1.0, ')/fOO'), ('', 56.7)) | |
74 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LF | ns.F | ns.N)(b'6A-5.034e+1') == (b'6A-5.034e+1',) | |
75 | ||
76 | ||
77 | def test_natsort_keygen_splits_input_with_locale(): | |
78 | load_locale('en_US') | |
79 | strxfrm = get_strxfrm() | |
80 | with patch('natsort.compat.locale.dumb_sort', return_value=False): | |
81 | assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string_locale, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1), (strxfrm('/Folder ('), 1, strxfrm(')/Foo')), (null_string_locale, 56.7)) | |
82 | with patch('natsort.compat.locale.dumb_sort', return_value=True): | |
83 | assert natsort_keygen(alg=ns.L)(INPUT) == ((null_string_locale, 6, strxfrm('aa--'), 5, strxfrm('..'), 34, strxfrm('eE++'), 1), (strxfrm('//ffoOlLdDeErR (('), 1, strxfrm('))//ffoOoO')), (null_string_locale, 56.7)) | |
84 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA)(b'6A-5.034e+1') == (b'6A-5.034e+1',) | |
85 | locale.setlocale(locale.LC_ALL, str('')) | |
86 | ||
87 | ||
88 | def test_natsort_keygen_splits_input_with_locale_and_capitalfirst(): | |
89 | load_locale('en_US') | |
90 | strxfrm = get_strxfrm() | |
91 | with patch('natsort.compat.locale.dumb_sort', return_value=False): | |
92 | assert natsort_keygen(alg=ns.LA | ns.C)(INPUT) == ((('',), (null_string_locale, 6, strxfrm('A-'), 5, strxfrm('.'), 34, strxfrm('e+'), 1)), (('/',), (strxfrm('/Folder ('), 1, strxfrm(')/Foo'))), (('',), (null_string_locale, 56.7))) | |
93 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.LA | ns.C)(b'6A-5.034e+1') == (b'6A-5.034e+1',) | |
94 | locale.setlocale(locale.LC_ALL, str('')) | |
95 | ||
96 | ||
97 | def test_natsort_keygen_splits_input_with_path(): | |
98 | assert natsort_keygen(alg=ns.P | ns.G)(INPUT) == ((('', 6, 'aA--', 5, '..', 34, 'ee++', 1),), (('//',), ('fFoollddeerr ((', 1, '))'), ('fFoooo',)), (('', 56.7),)) | |
99 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.P | ns.G)(b'6A-5.034e+1') == ((b'6A-5.034e+1',),) | |
100 | ||
101 | ||
102 | def test_natsort_keygen_splits_input_with_ignorecase(): | |
103 | assert natsort_keygen(alg=ns.IC)(INPUT) == (('', 6, 'a-', 5, '.', 34, 'e+', 1), ('/folder (', 1, ')/foo'), ('', 56.7)) | |
104 | if PY_VERSION >= 3: assert natsort_keygen(alg=ns.IC)(b'6A-5.034e+1') == (b'6a-5.034e+1',) |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Here are a collection of examples of how this module can be used. | |
3 | See the README or the natsort homepage for more details. | |
4 | """ | |
5 | from __future__ import unicode_literals, print_function | |
6 | import pytest | |
7 | import locale | |
8 | from natsort.compat.py23 import PY_VERSION | |
9 | from operator import itemgetter | |
10 | from pytest import raises | |
11 | from natsort import ( | |
12 | natsorted, | |
13 | ns, | |
14 | ) | |
15 | from compat.locale import ( | |
16 | load_locale, | |
17 | has_locale_de_DE, | |
18 | ) | |
19 | ||
20 | ||
21 | def test_natsorted_returns_strings_with_numbers_in_ascending_order(): | |
22 | a = ['a2', 'a5', 'a9', 'a1', 'a4', 'a10', 'a6'] | |
23 | assert natsorted(a) == ['a1', 'a2', 'a4', 'a5', 'a6', 'a9', 'a10'] | |
24 | ||
25 | ||
26 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_floats_with_exponents(): | |
27 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
28 | assert natsorted(a, alg=ns.REAL) == ['a-50', 'a50', 'a50.300', 'a50.31', 'a5.034e1', 'a50.4', 'a51.'] | |
29 | ||
30 | ||
31 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_floats_without_exponents_with_NOEXP_option(): | |
32 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
33 | assert natsorted(a, alg=ns.N | ns.F | ns.U) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
34 | # UNSIGNED is default | |
35 | assert natsorted(a, alg=ns.NOEXP | ns.FLOAT) == ['a5.034e1', 'a50', 'a50.300', 'a50.31', 'a50.4', 'a51.', 'a-50'] | |
36 | ||
37 | ||
38 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_INT_option(): | |
39 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
40 | assert natsorted(a, alg=ns.INT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
41 | # INT is default | |
42 | assert natsorted(a) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
43 | ||
44 | ||
45 | def test_natsorted_returns_list_of_numbers_sorted_as_unsigned_ints_with_DIGIT_and_VERSION_option(): | |
46 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
47 | assert natsorted(a, alg=ns.DIGIT) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
48 | assert natsorted(a, alg=ns.VERSION) == ['a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.', 'a-50'] | |
49 | ||
50 | ||
51 | def test_natsorted_returns_list_of_numbers_sorted_as_signed_ints_with_SIGNED_option(): | |
52 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
53 | assert natsorted(a, alg=ns.SIGNED) == ['a-50', 'a5.034e1', 'a50', 'a50.4', 'a50.31', 'a50.300', 'a51.'] | |
54 | ||
55 | ||
56 | def test_natsorted_returns_list_of_numbers_sorted_accounting_for_sign_with_SIGNED_option(): | |
57 | a = ['a-5', 'a7', 'a+2'] | |
58 | assert natsorted(a, alg=ns.SIGNED) == ['a-5', 'a+2', 'a7'] | |
59 | ||
60 | ||
61 | def test_natsorted_returns_list_of_numbers_sorted_not_accounting_for_sign_without_SIGNED_option(): | |
62 | a = ['a-5', 'a7', 'a+2'] | |
63 | assert natsorted(a) == ['a7', 'a+2', 'a-5'] | |
64 | ||
65 | ||
66 | def test_natsorted_returns_sorted_list_of_version_numbers_by_default_or_with_VERSION_option(): | |
67 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
68 | assert natsorted(a) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
69 | assert natsorted(a, alg=ns.VERSION) == ['1.9.9a', '1.9.9b', '1.10.1', '1.11', '1.11.4'] | |
70 | ||
71 | ||
72 | def test_natsorted_returns_sorted_list_with_mixed_type_input_and_does_not_raise_TypeError_on_Python3(): | |
73 | # You can mix types with natsorted. This can get around the new | |
74 | # 'unorderable types' issue with Python 3. | |
75 | a = [6, 4.5, '7', '2.5', 'a'] | |
76 | assert natsorted(a) == ['2.5', 4.5, 6, '7', 'a'] | |
77 | a = [46, '5a5b2', 'af5', '5a5-4'] | |
78 | assert natsorted(a) == ['5a5-4', '5a5b2', 46, 'af5'] | |
79 | ||
80 | ||
81 | def test_natsorted_with_mixed_input_returns_sorted_results_without_error(): | |
82 | a = ['0', 'Á', '2', 'Z'] | |
83 | assert natsorted(a) == ['0', '2', 'Á', 'Z'] | |
84 | assert natsorted(a, alg=ns.NUMAFTER) == ['Á', 'Z', '0', '2'] | |
85 | a = ['2', 'ä', 'b', 1.5, 3] | |
86 | assert natsorted(a) == [1.5, '2', 3, 'ä', 'b'] | |
87 | assert natsorted(a, alg=ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3] | |
88 | ||
89 | ||
90 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_last_with_NANLAST(): | |
91 | a = ['25', 5, float('nan'), 1E40] | |
92 | # The slice is because NaN != NaN | |
93 | assert natsorted(a, alg=ns.NANLAST)[:3] == [5, '25', 1E40, float('nan')][:3] | |
94 | ||
95 | ||
96 | def test_natsorted_with_nan_input_returns_sorted_results_with_nan_first_without_NANLAST(): | |
97 | a = ['25', 5, float('nan'), 1E40] | |
98 | # The slice is because NaN != NaN | |
99 | assert natsorted(a)[1:] == [float('nan'), 5, '25', 1E40][1:] | |
100 | ||
101 | ||
102 | def test_natsorted_with_mixed_input_raises_TypeError_if_bytes_type_is_involved_on_Python3(): | |
103 | if PY_VERSION >= 3: | |
104 | with raises(TypeError) as e: | |
105 | assert natsorted(['ä', b'b']) | |
106 | assert 'bytes' in str(e.value) | |
107 | else: | |
108 | assert True | |
109 | ||
110 | ||
111 | def test_natsorted_raises_ValueError_for_non_iterable_input(): | |
112 | with raises(TypeError) as err: | |
113 | natsorted(100) | |
114 | assert str(err.value) == "'int' object is not iterable" | |
115 | ||
116 | ||
117 | def test_natsorted_recursivley_applies_key_to_nested_lists_to_return_sorted_nested_list(): | |
118 | data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] | |
119 | assert natsorted(data) == [['a1', 'a5'], ['a1', 'a40'], ['a2', 'a5'], ['a10', 'a1']] | |
120 | ||
121 | ||
122 | def test_natsorted_applies_key_to_each_list_element_before_sorting_list(): | |
123 | b = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] | |
124 | assert natsorted(b, key=itemgetter(1)) == [('c', 'num2'), ('a', 'num3'), ('b', 'num5')] | |
125 | ||
126 | ||
127 | def test_natsorted_returns_list_in_reversed_order_with_reverse_option(): | |
128 | a = ['a50', 'a51.', 'a50.31', 'a50.4', 'a5.034e1', 'a50.300'] | |
129 | assert natsorted(a, reverse=True) == natsorted(a)[::-1] | |
130 | ||
131 | ||
132 | def test_natsorted_sorts_OS_generated_paths_incorrectly_without_PATH_option(): | |
133 | a = ['/p/Folder (10)/file.tar.gz', | |
134 | '/p/Folder/file.tar.gz', | |
135 | '/p/Folder (1)/file (1).tar.gz', | |
136 | '/p/Folder (1)/file.tar.gz'] | |
137 | assert natsorted(a) == ['/p/Folder (1)/file (1).tar.gz', | |
138 | '/p/Folder (1)/file.tar.gz', | |
139 | '/p/Folder (10)/file.tar.gz', | |
140 | '/p/Folder/file.tar.gz'] | |
141 | ||
142 | ||
143 | def test_natsorted_sorts_OS_generated_paths_correctly_with_PATH_option(): | |
144 | a = ['/p/Folder (10)/file.tar.gz', | |
145 | '/p/Folder/file.tar.gz', | |
146 | '/p/Folder (1)/file (1).tar.gz', | |
147 | '/p/Folder (1)/file.tar.gz'] | |
148 | assert natsorted(a, alg=ns.PATH) == ['/p/Folder/file.tar.gz', | |
149 | '/p/Folder (1)/file.tar.gz', | |
150 | '/p/Folder (1)/file (1).tar.gz', | |
151 | '/p/Folder (10)/file.tar.gz'] | |
152 | ||
153 | ||
154 | def test_natsorted_can_handle_sorting_paths_and_numbers_with_PATH(): | |
155 | # You can sort paths and numbers, not that you'd want to | |
156 | a = ['/Folder (9)/file.exe', 43] | |
157 | assert natsorted(a, alg=ns.PATH) == [43, '/Folder (9)/file.exe'] | |
158 | ||
159 | ||
160 | def test_natsorted_returns_results_in_ASCII_order_with_no_case_options(): | |
161 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
162 | assert natsorted(a) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
163 | ||
164 | ||
165 | def test_natsorted_returns_results_sorted_by_lowercase_ASCII_order_with_IGNORECASE(): | |
166 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
167 | assert natsorted(a, alg=ns.IGNORECASE) == ['Apple', 'apple', 'Banana', 'banana', 'corn', 'Corn'] | |
168 | ||
169 | ||
170 | def test_natsorted_returns_results_in_ASCII_order_but_with_lowercase_letters_first_with_LOWERCASEFIRST(): | |
171 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
172 | assert natsorted(a, alg=ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
173 | ||
174 | ||
175 | def test_natsorted_returns_results_with_uppercase_and_lowercase_letters_grouped_together_with_GROUPLETTERS(): | |
176 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
177 | assert natsorted(a, alg=ns.GROUPLETTERS) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
178 | ||
179 | ||
180 | def test_natsorted_returns_results_in_natural_order_with_GROUPLETTERS_and_LOWERCASEFIRST(): | |
181 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
182 | assert natsorted(a, alg=ns.G | ns.LF) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
183 | ||
184 | ||
185 | def test_natsorted_places_uppercase_letters_before_lowercase_letters_for_nested_input(): | |
186 | b = [('A5', 'a6'), ('a3', 'a1')] | |
187 | assert natsorted(b) == [('A5', 'a6'), ('a3', 'a1')] | |
188 | ||
189 | ||
190 | def test_natsorted_with_LOWERCASEFIRST_places_lowercase_letters_before_uppercase_letters_for_nested_input(): | |
191 | b = [('A5', 'a6'), ('a3', 'a1')] | |
192 | assert natsorted(b, alg=ns.LOWERCASEFIRST) == [('a3', 'a1'), ('A5', 'a6')] | |
193 | ||
194 | ||
195 | def test_natsorted_with_IGNORECASE_sorts_without_regard_to_case_for_nested_input(): | |
196 | b = [('A5', 'a6'), ('a3', 'a1')] | |
197 | assert natsorted(b, alg=ns.IGNORECASE) == [('a3', 'a1'), ('A5', 'a6')] | |
198 | ||
199 | ||
200 | def test_natsorted_with_LOCALE_returns_results_sorted_by_lowercase_first_and_grouped_letters(): | |
201 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
202 | load_locale('en_US') | |
203 | assert natsorted(a, alg=ns.LOCALE) == ['apple', 'Apple', 'banana', 'Banana', 'corn', 'Corn'] | |
204 | locale.setlocale(locale.LC_ALL, str('')) | |
205 | ||
206 | ||
207 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_returns_results_sorted_by_capital_first_and_ungrouped(): | |
208 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
209 | load_locale('en_US') | |
210 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST) == ['Apple', 'Banana', 'Corn', 'apple', 'banana', 'corn'] | |
211 | locale.setlocale(locale.LC_ALL, str('')) | |
212 | ||
213 | ||
214 | def test_natsorted_with_LOCALE_and_LOWERCASEFIRST_returns_results_sorted_by_uppercase_first_and_grouped_letters(): | |
215 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
216 | load_locale('en_US') | |
217 | assert natsorted(a, alg=ns.LOCALE | ns.LOWERCASEFIRST) == ['Apple', 'apple', 'Banana', 'banana', 'Corn', 'corn'] | |
218 | locale.setlocale(locale.LC_ALL, str('')) | |
219 | ||
220 | ||
221 | def test_natsorted_with_LOCALE_and_CAPITALFIRST_and_LOWERCASE_returns_results_sorted_by_capital_last_and_ungrouped(): | |
222 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
223 | load_locale('en_US') | |
224 | assert natsorted(a, alg=ns.LOCALE | ns.CAPITALFIRST | ns.LOWERCASEFIRST) == ['apple', 'banana', 'corn', 'Apple', 'Banana', 'Corn'] | |
225 | locale.setlocale(locale.LC_ALL, str('')) | |
226 | ||
227 | ||
228 | def test_natsorted_with_LOCALE_and_en_setting_returns_results_sorted_by_en_language(): | |
229 | load_locale('en_US') | |
230 | a = ['c', 'a5,467.86', 'ä', 'b', 'a5367.86', 'a5,6', 'a5,50'] | |
231 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,6', 'a5,50', 'a5367.86', 'a5,467.86', 'ä', 'b', 'c'] | |
232 | locale.setlocale(locale.LC_ALL, str('')) | |
233 | ||
234 | ||
235 | @pytest.mark.skipif(not has_locale_de_DE, reason='requires de_DE locale and working locale') | |
236 | def test_natsorted_with_LOCALE_and_de_setting_returns_results_sorted_by_de_language(): | |
237 | load_locale('de_DE') | |
238 | a = ['c', 'a5.467,86', 'ä', 'b', 'a5367.86', 'a5,6', 'a5,50'] | |
239 | assert natsorted(a, alg=ns.LOCALE | ns.F) == ['a5,50', 'a5,6', 'a5367.86', 'a5.467,86', 'ä', 'b', 'c'] | |
240 | locale.setlocale(locale.LC_ALL, str('')) | |
241 | ||
242 | ||
243 | def test_natsorted_with_LOCALE_and_mixed_input_returns_sorted_results_without_error(): | |
244 | load_locale('en_US') | |
245 | a = ['0', 'Á', '2', 'Z'] | |
246 | assert natsorted(a, alg=ns.LOCALE) == ['0', '2', 'Á', 'Z'] | |
247 | assert natsorted(a, alg=ns.LOCALE | ns.NUMAFTER) == ['Á', 'Z', '0', '2'] | |
248 | a = ['2', 'ä', 'b', 1.5, 3] | |
249 | assert natsorted(a, alg=ns.LOCALE) == [1.5, '2', 3, 'ä', 'b'] | |
250 | assert natsorted(a, alg=ns.LOCALE | ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3] | |
251 | locale.setlocale(locale.LC_ALL, str('')) | |
252 | ||
253 | ||
254 | def test_natsorted_with_LOCALE_and_UNGROUPLETTERS_and_mixed_input_returns_sorted_results_without_error(): | |
255 | load_locale('en_US') | |
256 | a = ['0', 'Á', '2', 'Z'] | |
257 | assert natsorted(a, alg=ns.LOCALE | ns.UNGROUPLETTERS) == ['0', '2', 'Á', 'Z'] | |
258 | assert natsorted(a, alg=ns.LOCALE | ns.UNGROUPLETTERS | ns.NUMAFTER) == ['Á', 'Z', '0', '2'] | |
259 | a = ['2', 'ä', 'b', 1.5, 3] | |
260 | assert natsorted(a, alg=ns.LOCALE | ns.UNGROUPLETTERS) == [1.5, '2', 3, 'ä', 'b'] | |
261 | assert natsorted(a, alg=ns.LOCALE | ns.UNGROUPLETTERS | ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3] | |
262 | locale.setlocale(locale.LC_ALL, str('')) | |
263 | ||
264 | ||
265 | def test_natsorted_with_PATH_and_LOCALE_and_UNGROUPLETTERS_and_mixed_input_returns_sorted_results_without_error(): | |
266 | load_locale('en_US') | |
267 | a = ['0', 'Á', '2', 'Z'] | |
268 | assert natsorted(a, alg=ns.PATH | ns.LOCALE | ns.UNGROUPLETTERS) == ['0', '2', 'Á', 'Z'] | |
269 | assert natsorted(a, alg=ns.PATH | ns.LOCALE | ns.UNGROUPLETTERS | ns.NUMAFTER) == ['Á', 'Z', '0', '2'] | |
270 | a = ['2', 'ä', 'b', 1.5, 3] | |
271 | assert natsorted(a, alg=ns.PATH | ns.LOCALE | ns.UNGROUPLETTERS) == [1.5, '2', 3, 'ä', 'b'] | |
272 | assert natsorted(a, alg=ns.PATH | ns.LOCALE | ns.UNGROUPLETTERS | ns.NUMAFTER) == ['ä', 'b', 1.5, '2', 3] | |
273 | locale.setlocale(locale.LC_ALL, str('')) | |
274 | ||
275 | ||
276 | def test_natsorted_sorts_an_odd_collection_of_string(): | |
277 | a = ['Corn', 'apple', 'Banana', '73', 'Apple', '5039', 'corn', '~~~~~~', 'banana'] | |
278 | assert natsorted(a) == ['73', '5039', 'Apple', 'Banana', 'Corn', | |
279 | 'apple', 'banana', 'corn', '~~~~~~'] | |
280 | assert natsorted(a, alg=ns.NUMAFTER) == ['Apple', 'Banana', 'Corn', | |
281 | 'apple', 'banana', 'corn', '~~~~~~', '73', '5039'] | |
282 | ||
283 | ||
284 | def test_natsorted_sorts_mixed_ascii_and_non_ascii_numbers(): | |
285 | a = ['1st street', '10th street', '2nd street', '2 street', '1 street', '1street', | |
286 | '11 street', 'street 2', 'street 1', 'Street 11', '۲ street', '۱ street', '۱street', | |
287 | '۱۲street', '۱۱ street', 'street ۲', 'street ۱', 'street ۱', 'street ۱۲', 'street ۱۱'] | |
288 | expected = ['1 street', '۱ street', '1st street', '1street', '۱street', '2 street', '۲ street', | |
289 | '2nd street', '10th street', '11 street', '۱۱ street', '۱۲street', 'street 1', | |
290 | 'street ۱', 'street ۱', 'street 2', 'street ۲', 'Street 11', 'street ۱۱', 'street ۱۲'] | |
291 | assert natsorted(a, alg=ns.IGNORECASE) == expected |
0 | # -*- coding: utf-8 -*- | |
1 | """\ | |
2 | Here are a collection of examples of how this module can be used. | |
3 | See the README or the natsort homepage for more details. | |
4 | """ | |
5 | from __future__ import unicode_literals, print_function | |
6 | from operator import itemgetter | |
7 | from natsort.compat.py23 import PY_VERSION | |
8 | from natsort import ( | |
9 | natsorted, | |
10 | index_natsorted, | |
11 | versorted, | |
12 | index_versorted, | |
13 | humansorted, | |
14 | index_humansorted, | |
15 | realsorted, | |
16 | index_realsorted, | |
17 | order_by_index, | |
18 | ns, | |
19 | decoder, | |
20 | as_ascii, | |
21 | as_utf8, | |
22 | ) | |
23 | ||
24 | ||
25 | def test_decoder_returns_function_that_can_decode_bytes_but_return_non_bytes_as_is(): | |
26 | f = decoder('latin1') | |
27 | a = 'bytes' | |
28 | b = 14 | |
29 | assert f(b'bytes') == a | |
30 | assert f(b) is b # returns as-is, same object ID | |
31 | if PY_VERSION >= 3: | |
32 | assert f(a) is a # same object returned on Python3 b/c only bytes has decode | |
33 | else: | |
34 | assert f(a) is not a | |
35 | assert f(a) == a # not same object on Python2 because str can decode | |
36 | ||
37 | ||
38 | def test_as_ascii_returns_bytes_as_ascii(): | |
39 | assert decoder('ascii')(b'bytes') == as_ascii(b'bytes') | |
40 | ||
41 | ||
42 | def test_as_utf8_returns_bytes_as_utf8(): | |
43 | assert decoder('utf8')(b'bytes') == as_utf8(b'bytes') | |
44 | ||
45 | ||
46 | def test_versorted_returns_results_identical_to_natsorted(): | |
47 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
48 | # versorted is retained for backwards compatibility | |
49 | assert versorted(a) == natsorted(a) | |
50 | ||
51 | ||
52 | def test_realsorted_returns_results_identical_to_natsorted_with_REAL(): | |
53 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
54 | assert realsorted(a) == natsorted(a, alg=ns.REAL) | |
55 | ||
56 | ||
57 | def test_humansorted_returns_results_identical_to_natsorted_with_LOCALE(): | |
58 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
59 | assert humansorted(a) == natsorted(a, alg=ns.LOCALE) | |
60 | ||
61 | ||
62 | def test_index_natsorted_returns_integer_list_of_sort_order_for_input_list(): | |
63 | a = ['num3', 'num5', 'num2'] | |
64 | b = ['foo', 'bar', 'baz'] | |
65 | index = index_natsorted(a) | |
66 | assert index == [2, 0, 1] | |
67 | assert [a[i] for i in index] == ['num2', 'num3', 'num5'] | |
68 | assert [b[i] for i in index] == ['baz', 'foo', 'bar'] | |
69 | ||
70 | ||
71 | def test_index_natsorted_returns_reversed_integer_list_of_sort_order_for_input_list_with_reverse_option(): | |
72 | a = ['num3', 'num5', 'num2'] | |
73 | assert index_natsorted(a, reverse=True) == [1, 0, 2] | |
74 | ||
75 | ||
76 | def test_index_natsorted_applies_key_function_before_sorting(): | |
77 | c = [('a', 'num3'), ('b', 'num5'), ('c', 'num2')] | |
78 | assert index_natsorted(c, key=itemgetter(1)) == [2, 0, 1] | |
79 | ||
80 | ||
81 | def test_index_natsorted_handles_unorderable_types_error_on_Python3(): | |
82 | a = [46, '5a5b2', 'af5', '5a5-4'] | |
83 | assert index_natsorted(a) == [3, 1, 0, 2] | |
84 | ||
85 | ||
86 | def test_index_natsorted_returns_integer_list_of_nested_input_list(): | |
87 | data = [['a1', 'a5'], ['a1', 'a40'], ['a10', 'a1'], ['a2', 'a5']] | |
88 | assert index_natsorted(data) == [0, 1, 3, 2] | |
89 | ||
90 | ||
91 | def test_index_natsorted_returns_integer_list_in_proper_order_for_input_paths_with_PATH(): | |
92 | a = ['/p/Folder (10)/', | |
93 | '/p/Folder/', | |
94 | '/p/Folder (1)/'] | |
95 | assert index_natsorted(a, alg=ns.PATH) == [1, 2, 0] | |
96 | ||
97 | ||
98 | def test_index_versorted_returns_results_identical_to_index_natsorted(): | |
99 | a = ['1.9.9a', '1.11', '1.9.9b', '1.11.4', '1.10.1'] | |
100 | # index_versorted is retained for backwards compatibility | |
101 | assert index_versorted(a) == index_natsorted(a) | |
102 | ||
103 | ||
104 | def test_index_realsorted_returns_results_identical_to_index_natsorted_with_REAL(): | |
105 | a = ['a50', 'a51.', 'a50.31', 'a-50', 'a50.4', 'a5.034e1', 'a50.300'] | |
106 | assert index_realsorted(a) == index_natsorted(a, alg=ns.REAL) | |
107 | ||
108 | ||
109 | def test_index_humansorted_returns_results_identical_to_index_natsorted_with_LOCALE(): | |
110 | a = ['Apple', 'corn', 'Corn', 'Banana', 'apple', 'banana'] | |
111 | assert index_humansorted(a) == index_natsorted(a, alg=ns.LOCALE) | |
112 | ||
113 | ||
114 | def test_order_by_index_sorts_list_according_to_order_of_integer_list(): | |
115 | a = ['num3', 'num5', 'num2'] | |
116 | index = [2, 0, 1] | |
117 | assert order_by_index(a, index) == ['num2', 'num3', 'num5'] | |
118 | assert order_by_index(a, index) == [a[i] for i in index] | |
119 | ||
120 | ||
121 | def test_order_by_index_returns_generator_with_iter_True(): | |
122 | a = ['num3', 'num5', 'num2'] | |
123 | index = [2, 0, 1] | |
124 | assert order_by_index(a, index, True) != [a[i] for i in index] | |
125 | assert list(order_by_index(a, index, True)) == [a[i] for i in index] |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from natsort.ns_enum import ns | |
5 | from natsort.utils import _parse_bytes_factory | |
6 | from hypothesis import given | |
7 | from hypothesis.strategies import binary | |
8 | ||
9 | ||
10 | # Each test has an "example" version for demonstrative purposes, | |
11 | # and a test that uses the hypothesis module. | |
12 | ||
13 | ||
14 | def test_parse_bytes_factory_makes_function_that_returns_tuple_example(): | |
15 | assert _parse_bytes_factory(0)(b'hello') == (b'hello',) | |
16 | ||
17 | ||
18 | @given(binary()) | |
19 | def test_parse_bytes_factory_makes_function_that_returns_tuple(x): | |
20 | assert _parse_bytes_factory(0)(x) == (x,) | |
21 | ||
22 | ||
23 | def test_parse_bytes_factory_with_IGNORECASE_makes_function_that_returns_tuple_with_lowercase_example(): | |
24 | assert _parse_bytes_factory(ns.IGNORECASE)(b'HelLo') == (b'hello',) | |
25 | ||
26 | ||
27 | @given(binary()) | |
28 | def test_parse_bytes_factory_with_IGNORECASE_makes_function_that_returns_tuple_with_lowercase(x): | |
29 | assert _parse_bytes_factory(ns.IGNORECASE)(x) == (x.lower(),) | |
30 | ||
31 | ||
32 | def test_parse_bytes_factory_with_PATH_makes_function_that_returns_nested_tuple_example(): | |
33 | assert _parse_bytes_factory(ns.PATH)(b'hello') == ((b'hello',),) | |
34 | ||
35 | ||
36 | @given(binary()) | |
37 | def test_parse_bytes_factory_with_PATH_makes_function_that_returns_nested_tuple(x): | |
38 | assert _parse_bytes_factory(ns.PATH)(x) == ((x,),) | |
39 | ||
40 | ||
41 | def test_parse_bytes_factory_with_PATH_and_IGNORECASE_makes_function_that_returns_nested_tuple_with_lowercase_example(): | |
42 | assert _parse_bytes_factory(ns.PATH | ns.IGNORECASE)(b'HelLo') == ((b'hello',),) | |
43 | ||
44 | ||
45 | @given(binary()) | |
46 | def test_parse_bytes_factory_with_PATH_and_IGNORECASE_makes_function_that_returns_nested_tuple_with_lowercase(x): | |
47 | assert _parse_bytes_factory(ns.PATH | ns.IGNORECASE)(x) == ((x.lower(),),) |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from natsort.ns_enum import ns | |
5 | from natsort.utils import _parse_number_factory | |
6 | from hypothesis import ( | |
7 | given, | |
8 | ) | |
9 | from hypothesis.strategies import ( | |
10 | floats, | |
11 | integers, | |
12 | ) | |
13 | ||
14 | ||
15 | # Each test has an "example" version for demonstrative purposes, | |
16 | # and a test that uses the hypothesis module. | |
17 | ||
18 | ||
19 | def test_parse_number_factory_makes_function_that_returns_tuple_example(): | |
20 | assert _parse_number_factory(0, '', '')(57) == ('', 57) | |
21 | assert _parse_number_factory(0, '', '')(float('nan')) == ('', float('-inf')) | |
22 | assert _parse_number_factory(ns.NANLAST, '', '')(float('nan')) == ('', float('+inf')) | |
23 | ||
24 | ||
25 | @given(floats(allow_nan=False) | integers()) | |
26 | def test_parse_number_factory_makes_function_that_returns_tuple(x): | |
27 | assert _parse_number_factory(0, '', '')(x) == ('', x) | |
28 | ||
29 | ||
30 | def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple_example(): | |
31 | assert _parse_number_factory(ns.PATH, '', '')(57) == (('', 57),) | |
32 | ||
33 | ||
34 | @given(floats(allow_nan=False) | integers()) | |
35 | def test_parse_number_factory_with_PATH_makes_function_that_returns_nested_tuple(x): | |
36 | assert _parse_number_factory(ns.PATH, '', '')(x) == (('', x),) | |
37 | ||
38 | ||
39 | def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example(): | |
40 | assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == (('xx',), ('', 57)) | |
41 | ||
42 | ||
43 | @given(floats(allow_nan=False) | integers()) | |
44 | def test_parse_number_factory_with_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x): | |
45 | assert _parse_number_factory(ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == (('xx',), ('', x)) | |
46 | ||
47 | ||
48 | def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple_example(): | |
49 | assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(57) == ((('xx',), ('', 57)),) | |
50 | ||
51 | ||
52 | @given(floats(allow_nan=False) | integers()) | |
53 | def test_parse_number_factory_with_PATH_UNGROUPLETTERS_LOCALE_makes_function_that_returns_nested_tuple(x): | |
54 | assert _parse_number_factory(ns.PATH | ns.UNGROUPLETTERS | ns.LOCALE, '', 'xx')(x) == ((('xx',), ('', x)),) |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from pytest import raises | |
5 | from natsort.ns_enum import ns | |
6 | from natsort.utils import ( | |
7 | _float_sign_exp_re, | |
8 | _float_nosign_exp_re, | |
9 | _float_sign_noexp_re, | |
10 | _float_nosign_noexp_re, | |
11 | _int_nosign_re, | |
12 | _int_sign_re, | |
13 | _parse_string_factory, | |
14 | _parse_path_factory, | |
15 | ) | |
16 | from natsort.compat.py23 import py23_str, PY_VERSION | |
17 | from natsort.compat.fastnumbers import ( | |
18 | fast_float, | |
19 | fast_int, | |
20 | ) | |
21 | from slow_splitters import ( | |
22 | int_splitter, | |
23 | float_splitter, | |
24 | ) | |
25 | from hypothesis import ( | |
26 | given, | |
27 | example, | |
28 | ) | |
29 | from hypothesis.strategies import ( | |
30 | lists, | |
31 | text, | |
32 | floats, | |
33 | integers, | |
34 | ) | |
35 | ||
36 | if PY_VERSION >= 3: | |
37 | long = int | |
38 | ||
39 | ||
40 | def whitespace_check(x): | |
41 | """Simplifies testing""" | |
42 | try: | |
43 | if x.isspace(): | |
44 | return x in ' \t\n\r\f\v' | |
45 | else: | |
46 | return True | |
47 | except (AttributeError, TypeError): | |
48 | return True | |
49 | ||
50 | ||
51 | def no_op(x): | |
52 | """A function that does nothing.""" | |
53 | return x | |
54 | ||
55 | ||
56 | def tuple2(x, dummy): | |
57 | """Make the input a tuple.""" | |
58 | return tuple(x) | |
59 | ||
60 | ||
61 | # Each test has an "example" version for demonstrative purposes, | |
62 | # and a test that uses the hypothesis module. | |
63 | ||
64 | ||
65 | def test_parse_string_factory_raises_TypeError_if_given_a_number_example(): | |
66 | with raises(TypeError): | |
67 | assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(50.0) | |
68 | ||
69 | ||
70 | @given(floats()) | |
71 | def test_parse_string_factory_raises_TypeError_if_given_a_number(x): | |
72 | with raises(TypeError): | |
73 | assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(x) | |
74 | ||
75 | ||
76 | def test_parse_string_factory_only_parses_digits_with_nosign_int_example(): | |
77 | assert _parse_string_factory(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '+', 5, '.', 34, 'e-', 1) | |
78 | ||
79 | ||
80 | @given(lists(elements=floats() | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
81 | @example([10000000000000000000000000000000000000000000000000000000000000000000000000, | |
82 | 100000000000000000000000000000000000000000000000000000000000000000000000000, | |
83 | 100000000000000000000000000000000000000000000000000000000000000000000000000]) | |
84 | def test_parse_string_factory_only_parses_digits_with_nosign_int(x): | |
85 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
86 | assert _parse_string_factory(0, '', _int_nosign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, False, '') | |
87 | ||
88 | ||
89 | def test_parse_string_factory_parses_digit_with_sign_with_signed_int_example(): | |
90 | assert _parse_string_factory(0, '', _int_sign_re.split, no_op, fast_int, tuple2)('a5+5.034e-1') == ('a', 5, '', 5, '.', 34, 'e', -1) | |
91 | ||
92 | ||
93 | @given(lists(elements=floats() | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
94 | def test_parse_string_factory_parses_digit_with_sign_with_signed_int(x): | |
95 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
96 | assert _parse_string_factory(0, '', _int_sign_re.split, no_op, fast_int, tuple2)(s) == int_splitter(s, True, '') | |
97 | ||
98 | ||
99 | def test_parse_string_factory_only_parses_float_with_nosign_noexp_float_example(): | |
100 | assert _parse_string_factory(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 5.034, 'e-', 1.0) | |
101 | ||
102 | ||
103 | @given(lists(elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
104 | def test_parse_string_factory_only_parses_float_with_nosign_noexp_float(x): | |
105 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
106 | assert _parse_string_factory(0, '', _float_nosign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, False, '') | |
107 | ||
108 | ||
109 | def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float_example(): | |
110 | assert _parse_string_factory(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '+', 0.5034) | |
111 | ||
112 | ||
113 | @given(lists(elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
114 | def test_parse_string_factory_only_parses_float_with_exponent_with_nosign_exp_float(x): | |
115 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
116 | assert _parse_string_factory(0, '', _float_nosign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, False, True, '') | |
117 | ||
118 | ||
119 | def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float_example(): | |
120 | assert _parse_string_factory(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 5.034, 'e', -1.0) | |
121 | ||
122 | ||
123 | @given(lists(elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
124 | def test_parse_string_factory_only_parses_float_with_sign_with_sign_noexp_float(x): | |
125 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
126 | assert _parse_string_factory(0, '', _float_sign_noexp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, False, '') | |
127 | ||
128 | ||
129 | def test_parse_string_factory_parses_float_with_sign_exp_float_example(): | |
130 | assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('a5+5.034e-1') == ('a', 5.0, '', 0.5034) | |
131 | assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)('6a5+5.034e-1') == ('', 6.0, 'a', 5.0, '', 0.5034) | |
132 | ||
133 | ||
134 | @given(lists(elements=floats(allow_nan=False) | text().filter(whitespace_check) | integers(), min_size=1, max_size=10)) | |
135 | def test_parse_string_factory_parses_float_with_sign_exp_float(x): | |
136 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
137 | assert _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2)(s) == float_splitter(s, True, True, '') | |
138 | ||
139 | ||
140 | def test_parse_string_factory_selects_pre_function_value_if_not_dumb(): | |
141 | def tuple2(x, orig): | |
142 | """Make the input a tuple.""" | |
143 | return (orig[0], tuple(x)) | |
144 | assert _parse_string_factory(0, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) | |
145 | assert _parse_string_factory(ns._DUMB, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) | |
146 | assert _parse_string_factory(ns.LOCALE, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('A', ('A', 5, '+', 5, '.', 34, 'E-', 1)) | |
147 | assert _parse_string_factory(ns.LOCALE | ns._DUMB, '', _int_nosign_re.split, py23_str.upper, fast_float, tuple2)('a5+5.034e-1') == ('a', ('A', 5, '+', 5, '.', 34, 'E-', 1)) | |
148 | ||
149 | ||
150 | def test_parse_path_function_parses_string_as_path_then_as_string(): | |
151 | splt = _parse_string_factory(0, '', _float_sign_exp_re.split, no_op, fast_float, tuple2) | |
152 | assert _parse_path_factory(splt)('/p/Folder (10)/file34.5nm (2).tar.gz') == (('/',), ('p',), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
153 | assert _parse_path_factory(splt)('../Folder (10)/file (2).tar.gz') == (('..',), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
154 | assert _parse_path_factory(splt)('Folder (10)/file.f34.5nm (2).tar.gz') == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) |
0 | # -*- coding: utf-8 -*- | |
1 | """These test the utils.py functions.""" | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from natsort.ns_enum import ns | |
5 | from natsort.utils import ( | |
6 | _string_component_transform_factory, | |
7 | _groupletters, | |
8 | ) | |
9 | from natsort.compat.py23 import py23_str | |
10 | from natsort.compat.locale import get_strxfrm | |
11 | from natsort.compat.fastnumbers import ( | |
12 | fast_float, | |
13 | fast_int, | |
14 | ) | |
15 | from hypothesis import ( | |
16 | given, | |
17 | ) | |
18 | from hypothesis.strategies import ( | |
19 | text, | |
20 | floats, | |
21 | integers, | |
22 | ) | |
23 | from compat.locale import bad_uni_chars | |
24 | ||
25 | ||
26 | def no_null(x): | |
27 | return '\0' not in x | |
28 | ||
29 | ||
30 | # Each test has an "example" version for demonstrative purposes, | |
31 | # and a test that uses the hypothesis module. | |
32 | ||
33 | ||
34 | def test_string_component_transform_factory_returns_fast_int_example(): | |
35 | x = 'hello' | |
36 | assert _string_component_transform_factory(0)(x) is fast_int(x) | |
37 | assert _string_component_transform_factory(0)('5007') == fast_int('5007') | |
38 | ||
39 | ||
40 | @given(text().filter(bool) | floats() | integers()) | |
41 | def test_string_component_transform_factory_returns_fast_int(x): | |
42 | assert _string_component_transform_factory(0)(py23_str(x)) == fast_int(py23_str(x)) | |
43 | ||
44 | ||
45 | def test_string_component_transform_factory_with_FLOAT_returns_fast_float_example(): | |
46 | x = 'hello' | |
47 | assert _string_component_transform_factory(ns.FLOAT)(x) is fast_float(x) | |
48 | assert _string_component_transform_factory(ns.FLOAT)('5007') == fast_float('5007') | |
49 | ||
50 | ||
51 | @given(text().filter(bool) | floats() | integers()) | |
52 | def test_string_component_transform_factory_with_FLOAT_returns_fast_float(x): | |
53 | assert _string_component_transform_factory(ns.FLOAT)(py23_str(x)) == fast_float(py23_str(x), nan=float('-inf')) | |
54 | ||
55 | ||
56 | def test_string_component_transform_factory_with_FLOAT_returns_fast_float_with_neg_inf_replacing_nan(): | |
57 | assert _string_component_transform_factory(ns.FLOAT)('nan') == fast_float('nan', nan=float('-inf')) | |
58 | ||
59 | ||
60 | def test_string_component_transform_factory_with_FLOAT_and_NANLAST_returns_fast_float_with_pos_inf_replacing_nan(): | |
61 | assert _string_component_transform_factory(ns.FLOAT | ns.NANLAST)('nan') == fast_float('nan', nan=float('+inf')) | |
62 | ||
63 | ||
64 | def test_string_component_transform_factory_with_GROUPLETTERS_returns_fast_int_and_groupletters_example(): | |
65 | x = 'hello' | |
66 | assert _string_component_transform_factory(ns.GROUPLETTERS)(x) == fast_int(x, key=_groupletters) | |
67 | ||
68 | ||
69 | @given(text().filter(bool)) | |
70 | def test_string_component_transform_factory_with_GROUPLETTERS_returns_fast_int_and_groupletters(x): | |
71 | assert _string_component_transform_factory(ns.GROUPLETTERS)(x) == fast_int(x, key=_groupletters) | |
72 | ||
73 | ||
74 | def test_string_component_transform_factory_with_LOCALE_returns_fast_int_and_groupletters_example(): | |
75 | x = 'hello' | |
76 | assert _string_component_transform_factory(ns.LOCALE)(x) == fast_int(x, key=get_strxfrm()) | |
77 | ||
78 | ||
79 | @given(text().filter(bool).filter(lambda x: not any(y in bad_uni_chars for y in x)).filter(no_null)) | |
80 | def test_string_component_transform_factory_with_LOCALE_returns_fast_int_and_groupletters(x): | |
81 | assert _string_component_transform_factory(ns.LOCALE)(x) == fast_int(x, key=get_strxfrm()) | |
82 | ||
83 | ||
84 | def test_string_component_transform_factory_with_LOCALE_and_GROUPLETTERS_returns_fast_int_and_groupletters_and_locale_convert_example(): | |
85 | x = 'hello' | |
86 | assert _string_component_transform_factory(ns.GROUPLETTERS | ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x))) | |
87 | ||
88 | ||
89 | @given(text().filter(bool).filter(no_null)) | |
90 | def test_string_component_transform_factory_with_LOCALE_and_GROUPLETTERS_returns_fast_int_and_groupletters_and_locale_convert(x): | |
91 | try: | |
92 | assert _string_component_transform_factory(ns.GROUPLETTERS | ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x))) | |
93 | except ValueError as e: # handle broken locale lib on BSD. | |
94 | if 'is not in range' not in str(e): | |
95 | raise | |
96 | ||
97 | ||
98 | def test_string_component_transform_factory_with_LOCALE_and_DUMB_returns_fast_int_and_groupletters_and_locale_convert_example(): | |
99 | x = 'hello' | |
100 | assert _string_component_transform_factory(ns._DUMB | ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x))) | |
101 | ||
102 | ||
103 | @given(text().filter(bool).filter(no_null)) | |
104 | def test_string_component_transform_factory_with_LOCALE_and_DUMB_returns_fast_int_and_groupletters_and_locale_convert(x): | |
105 | try: | |
106 | assert _string_component_transform_factory(ns._DUMB | ns.LOCALE)(x) == fast_int(x, key=lambda x: get_strxfrm()(_groupletters(x))) | |
107 | except ValueError as e: # handle broken locale lib on BSD. | |
108 | if 'is not in range' not in str(e): | |
109 | raise |
5 | 5 | import unicodedata |
6 | 6 | from natsort.compat.py23 import py23_range, py23_unichr |
7 | 7 | from natsort.unicode_numbers import ( |
8 | numeric_hex, | |
8 | 9 | numeric_chars, |
9 | 10 | numeric, |
10 | 11 | digit_chars, |
11 | 12 | digits, |
13 | decimal_chars, | |
14 | decimals, | |
15 | digits_no_decimals, | |
16 | numeric_no_decimals, | |
12 | 17 | ) |
13 | 18 | |
14 | 19 | |
22 | 27 | assert unicodedata.digit(a, None) is not None |
23 | 28 | |
24 | 29 | |
25 | def test_numeric_chars_contains_all_valid_unicode_numeric_characters(): | |
26 | for i in py23_range(0X10FFFF): | |
30 | def test_decimal_chars_contains_only_valid_unicode_decimal_characters(): | |
31 | for a in decimal_chars: | |
32 | assert unicodedata.decimal(a, None) is not None | |
33 | ||
34 | ||
35 | def test_numeric_chars_contains_all_valid_unicode_numeric_and_digit_characters(): | |
36 | set_numeric_hex = set(numeric_hex) | |
37 | set_numeric_chars = set(numeric_chars) | |
38 | set_digit_chars = set(digit_chars) | |
39 | set_decimal_chars = set(decimal_chars) | |
40 | for i in py23_range(0X110000): | |
27 | 41 | try: |
28 | 42 | a = py23_unichr(i) |
29 | 43 | except ValueError: |
31 | 45 | if a in set('0123456789'): |
32 | 46 | continue |
33 | 47 | if unicodedata.numeric(a, None) is not None: |
34 | assert a in numeric_chars | |
48 | assert i in set_numeric_hex | |
49 | assert a in set_numeric_chars | |
50 | if unicodedata.digit(a, None) is not None: | |
51 | assert i in set_numeric_hex | |
52 | assert a in set_digit_chars | |
53 | if unicodedata.decimal(a, None) is not None: | |
54 | assert i in set_numeric_hex | |
55 | assert a in set_decimal_chars | |
35 | 56 | |
57 | assert set_decimal_chars.isdisjoint(digits_no_decimals) | |
58 | assert set_digit_chars.issuperset(digits_no_decimals) | |
36 | 59 | |
37 | def test_digit_chars_contains_all_valid_unicode_digit_characters(): | |
38 | for i in py23_range(0X10FFFF): | |
39 | try: | |
40 | a = py23_unichr(i) | |
41 | except ValueError: | |
42 | break | |
43 | if a in set('0123456789'): | |
44 | continue | |
45 | if unicodedata.digit(a, None) is not None: | |
46 | assert a in digit_chars | |
60 | assert set_decimal_chars.isdisjoint(numeric_no_decimals) | |
61 | assert set_numeric_chars.issuperset(numeric_no_decimals) | |
47 | 62 | |
48 | 63 | |
49 | 64 | def test_combined_string_contains_all_characters_in_list(): |
50 | 65 | assert numeric == ''.join(numeric_chars) |
51 | 66 | assert digits == ''.join(digit_chars) |
67 | assert decimals == ''.join(decimal_chars) |
1 | 1 | """These test the utils.py functions.""" |
2 | 2 | from __future__ import unicode_literals |
3 | 3 | |
4 | import sys | |
5 | import locale | |
6 | 4 | import pathlib |
7 | import pytest | |
8 | 5 | import string |
9 | from math import isnan | |
10 | from operator import itemgetter | |
11 | 6 | from itertools import chain |
7 | from operator import neg as op_neg | |
12 | 8 | from pytest import raises |
13 | 9 | from natsort.ns_enum import ns |
14 | 10 | from natsort.utils import ( |
15 | _number_extracter, | |
16 | _py3_safe, | |
17 | _natsort_key, | |
11 | _sep_inserter, | |
18 | 12 | _args_to_enum, |
13 | _regex_chooser, | |
19 | 14 | _float_sign_exp_re, |
20 | 15 | _float_nosign_exp_re, |
21 | 16 | _float_sign_noexp_re, |
24 | 19 | _int_sign_re, |
25 | 20 | _do_decoding, |
26 | 21 | _path_splitter, |
27 | _fix_nan, | |
28 | ) | |
29 | from natsort.locale_help import locale_convert | |
30 | from natsort.compat.py23 import py23_str | |
31 | from natsort.compat.locale import ( | |
32 | use_pyicu, | |
33 | null_string, | |
34 | dumb_sort, | |
35 | ) | |
36 | from natsort.compat.fastnumbers import ( | |
37 | fast_float, | |
38 | fast_int, | |
39 | isint, | |
40 | ) | |
22 | _groupletters, | |
23 | chain_functions, | |
24 | ) | |
25 | from natsort.compat.py23 import py23_str, py23_cmp | |
26 | from natsort.compat.locale import null_string_locale | |
41 | 27 | from slow_splitters import ( |
42 | int_splitter, | |
43 | float_splitter, | |
44 | 28 | sep_inserter, |
45 | ) | |
46 | from compat.locale import ( | |
47 | load_locale, | |
48 | get_strxfrm, | |
49 | low, | |
50 | ) | |
51 | from compat.hypothesis import ( | |
52 | assume, | |
29 | add_leading_space_if_first_is_num, | |
30 | ) | |
31 | from compat.locale import low | |
32 | from hypothesis import ( | |
53 | 33 | given, |
54 | example, | |
34 | ) | |
35 | from hypothesis.strategies import ( | |
55 | 36 | sampled_from, |
56 | use_hypothesis, | |
57 | ) | |
58 | ||
59 | if sys.version[0] == '3': | |
60 | long = int | |
61 | ||
62 | ichain = chain.from_iterable | |
37 | lists, | |
38 | text, | |
39 | integers, | |
40 | ) | |
63 | 41 | |
64 | 42 | |
65 | 43 | def test_do_decoding_decodes_bytes_string_to_unicode(): |
129 | 107 | assert _args_to_enum(**{'number_type': None, |
130 | 108 | 'exp': True}) == ns.I | ns.U |
131 | 109 | |
132 | float_nosafe_locale_group = (fast_float, False, True, True) | |
133 | float_nosafe_locale_nogroup = (fast_float, False, True, False) | |
134 | float_safe_nolocale_nogroup = (fast_float, True, False, False) | |
135 | float_nosafe_nolocale_group = (fast_float, False, False, True) | |
136 | float_nosafe_nolocale_nogroup = (fast_float, False, False, False) | |
137 | int_safe_locale_group = (fast_int, True, True, True) | |
138 | int_safe_locale_nogroup = (fast_int, True, True, False) | |
139 | int_safe_nolocale_group = (fast_int, True, False, True) | |
140 | int_safe_nolocale_nogroup = (fast_int, True, False, False) | |
141 | int_nosafe_locale_group = (fast_int, False, True, True) | |
142 | int_nosafe_locale_nogroup = (fast_int, False, True, False) | |
143 | int_nosafe_nolocale_group = (fast_int, False, False, True) | |
144 | int_nosafe_nolocale_nogroup = (fast_int, False, False, False) | |
145 | ||
146 | ||
147 | def test_fix_nan_converts_nan_to_negative_infinity_without_NANLAST(): | |
148 | assert _fix_nan((float('nan'),), 0) == (float('-inf'),) | |
149 | assert _fix_nan(('a', 'b', float('nan')), 0) == ('a', 'b', float('-inf')) | |
150 | ||
151 | ||
152 | def test_fix_nan_converts_nan_to_positive_infinity_with_NANLAST(): | |
153 | assert _fix_nan((float('nan'),), ns.NANLAST) == (float('+inf'),) | |
154 | assert _fix_nan(('a', 'b', float('nan')), ns.NANLAST) == ('a', 'b', float('+inf')) | |
110 | ||
111 | def test_regex_chooser_returns_correct_regular_expression_object(): | |
112 | assert _regex_chooser[ns.INT] is _int_nosign_re | |
113 | assert _regex_chooser[ns.INT | ns.NOEXP] is _int_nosign_re | |
114 | assert _regex_chooser[ns.INT | ns.SIGNED] is _int_sign_re | |
115 | assert _regex_chooser[ns.INT | ns.SIGNED | ns.NOEXP] is _int_sign_re | |
116 | assert _regex_chooser[ns.FLOAT] is _float_nosign_exp_re | |
117 | assert _regex_chooser[ns.FLOAT | ns.NOEXP] is _float_nosign_noexp_re | |
118 | assert _regex_chooser[ns.FLOAT | ns.SIGNED] is _float_sign_exp_re | |
119 | assert _regex_chooser[ns.FLOAT | ns.SIGNED | ns.NOEXP] is _float_sign_noexp_re | |
120 | ||
121 | ||
122 | def test_ns_enum_values_have_are_as_expected(): | |
123 | # Defaults | |
124 | assert ns.TYPESAFE == 0 | |
125 | assert ns.INT == 0 | |
126 | assert ns.VERSION == 0 | |
127 | assert ns.DIGIT == 0 | |
128 | assert ns.UNSIGNED == 0 | |
129 | ||
130 | # Aliases | |
131 | assert ns.TYPESAFE == ns.T | |
132 | assert ns.INT == ns.I | |
133 | assert ns.VERSION == ns.V | |
134 | assert ns.DIGIT == ns.D | |
135 | assert ns.UNSIGNED == ns.U | |
136 | assert ns.FLOAT == ns.F | |
137 | assert ns.SIGNED == ns.S | |
138 | assert ns.NOEXP == ns.N | |
139 | assert ns.PATH == ns.P | |
140 | assert ns.LOCALEALPHA == ns.LA | |
141 | assert ns.LOCALENUM == ns.LN | |
142 | assert ns.LOCALE == ns.L | |
143 | assert ns.IGNORECASE == ns.IC | |
144 | assert ns.LOWERCASEFIRST == ns.LF | |
145 | assert ns.GROUPLETTERS == ns.G | |
146 | assert ns.UNGROUPLETTERS == ns.UG | |
147 | assert ns.CAPITALFIRST == ns.C | |
148 | assert ns.UNGROUPLETTERS == ns.CAPITALFIRST | |
149 | assert ns.NANLAST == ns.NL | |
150 | assert ns.COMPATIBILITYNORMALIZE == ns.CN | |
151 | assert ns.NUMAFTER == ns.NA | |
152 | ||
153 | # Convenience | |
154 | assert ns.LOCALE == ns.LOCALEALPHA | ns.LOCALENUM | |
155 | assert ns.REAL == ns.FLOAT | ns.SIGNED | |
156 | assert ns._NUMERIC_ONLY == ns.REAL | ns.NOEXP | |
157 | ||
158 | ||
159 | def test_chain_functions_is_a_no_op_if_no_functions_are_given(): | |
160 | x = 2345 | |
161 | assert chain_functions([])(x) is x | |
162 | ||
163 | ||
164 | def test_chain_functions_does_one_function_if_one_function_is_given(): | |
165 | x = '2345' | |
166 | assert chain_functions([len])(x) == 4 | |
167 | ||
168 | ||
169 | def test_chain_functions_combines_functions_in_given_order(): | |
170 | x = 2345 | |
171 | assert chain_functions([str, len, op_neg])(x) == -len(str(x)) | |
155 | 172 | |
156 | 173 | |
157 | 174 | # Each test has an "example" version for demonstrative purposes, |
158 | 175 | # and a test that uses the hypothesis module. |
159 | 176 | |
160 | ||
161 | def test_py3_safe_does_nothing_if_no_numbers_example(): | |
162 | assert _py3_safe(['a', 'b', 'c'], False, isint) == ['a', 'b', 'c'] | |
163 | assert _py3_safe(['a'], False, isint) == ['a'] | |
164 | ||
165 | ||
166 | def test_py3_safe_does_nothing_if_only_one_number_example(): | |
167 | assert _py3_safe(['a', 5], False, isint) == ['a', 5] | |
168 | ||
169 | ||
170 | def test_py3_safe_inserts_empty_string_between_two_numbers_example(): | |
171 | assert _py3_safe([5, 9], False, isint) == [5, '', 9] | |
172 | ||
173 | ||
174 | def test_py3_safe_with_use_locale_inserts_null_string_between_two_numbers_example(): | |
175 | assert _py3_safe([5, 9], True, isint) == [5, null_string, 9] | |
176 | ||
177 | ||
178 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
179 | @given([py23_str, int]) | |
180 | def test_py3_safe_inserts_empty_string_between_two_numbers(x): | |
181 | assume(bool(x)) | |
182 | assert _py3_safe(x, False, isint) == sep_inserter(x, (int, long), '') | |
177 | def test_groupletters_returns_letters_with_lowercase_transform_of_letter_example(): | |
178 | assert _groupletters('HELLO') == 'hHeElLlLoO' | |
179 | assert _groupletters('hello') == 'hheelllloo' | |
180 | ||
181 | ||
182 | @given(text().filter(bool)) | |
183 | def test_groupeletters_returns_letters_with_lowercase_transform_of_letter(x): | |
184 | assert _groupletters(x) == ''.join(chain.from_iterable([low(y), y] for y in x)) | |
185 | ||
186 | ||
187 | def test_sep_inserter_does_nothing_if_no_numbers_example(): | |
188 | assert list(_sep_inserter(iter(['a', 'b', 'c']), '')) == ['a', 'b', 'c'] | |
189 | assert list(_sep_inserter(iter(['a']), '')) == ['a'] | |
190 | ||
191 | ||
192 | def test_sep_inserter_does_nothing_if_only_one_number_example(): | |
193 | assert list(_sep_inserter(iter(['a', 5]), '')) == ['a', 5] | |
194 | ||
195 | ||
196 | def test_sep_inserter_inserts_separator_string_between_two_numbers_example(): | |
197 | assert list(_sep_inserter(iter([5, 9]), '')) == ['', 5, '', 9] | |
198 | assert list(_sep_inserter(iter([5, 9]), null_string_locale)) == [null_string_locale, 5, null_string_locale, 9] | |
199 | ||
200 | ||
201 | @given(lists(elements=text().filter(bool) | integers())) | |
202 | def test_sep_inserter_inserts_separator_between_two_numbers(x): | |
203 | assert list(_sep_inserter(iter(x), '')) == list(add_leading_space_if_first_is_num(sep_inserter(x, ''), '')) | |
183 | 204 | |
184 | 205 | |
185 | 206 | def test_path_splitter_splits_path_string_by_separator_example(): |
186 | 207 | z = '/this/is/a/path' |
187 | assert _path_splitter(z) == list(pathlib.Path(z).parts) | |
188 | ||
189 | ||
190 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
191 | @given([sampled_from(string.ascii_letters)]) | |
208 | assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) | |
209 | z = pathlib.Path('/this/is/a/path') | |
210 | assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) | |
211 | ||
212 | ||
213 | @given(lists(sampled_from(string.ascii_letters), min_size=2).filter(all)) | |
192 | 214 | def test_path_splitter_splits_path_string_by_separator(x): |
193 | assume(len(x) > 1) | |
194 | assume(all(x)) | |
195 | 215 | z = py23_str(pathlib.Path(*x)) |
196 | assert _path_splitter(z) == list(pathlib.Path(z).parts) | |
216 | assert tuple(_path_splitter(z)) == tuple(pathlib.Path(z).parts) | |
197 | 217 | |
198 | 218 | |
199 | 219 | def test_path_splitter_splits_path_string_by_separator_and_removes_extension_example(): |
200 | 220 | z = '/this/is/a/path/file.exe' |
201 | y = list(pathlib.Path(z).parts) | |
202 | assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] | |
203 | ||
204 | ||
205 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
206 | @given([sampled_from(string.ascii_letters)]) | |
221 | y = tuple(pathlib.Path(z).parts) | |
222 | assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) | |
223 | ||
224 | ||
225 | @given(lists(sampled_from(string.ascii_letters), min_size=3).filter(all)) | |
207 | 226 | def test_path_splitter_splits_path_string_by_separator_and_removes_extension(x): |
208 | assume(len(x) > 2) | |
209 | assume(all(x)) | |
210 | 227 | z = py23_str(pathlib.Path(*x[:-2])) + '.' + x[-1] |
211 | y = list(pathlib.Path(z).parts) | |
212 | assert _path_splitter(z) == y[:-1] + [pathlib.Path(z).stem] + [pathlib.Path(z).suffix] | |
213 | ||
214 | ||
215 | def test_number_extracter_raises_TypeError_if_given_a_number_example(): | |
216 | with raises(TypeError): | |
217 | assert _number_extracter(50.0, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) | |
218 | ||
219 | ||
220 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
221 | @given(float) | |
222 | def test_number_extracter_raises_TypeError_if_given_a_number(x): | |
223 | with raises(TypeError): | |
224 | assert _number_extracter(x, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) | |
225 | ||
226 | ||
227 | def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats_example(): | |
228 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 0.5034] | |
229 | ||
230 | ||
231 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
232 | @given([float, py23_str, int]) | |
233 | def test_number_extracter_includes_plus_sign_and_exponent_in_float_definition_for_signed_exp_floats(x): | |
234 | assume(len(x) <= 10) | |
235 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
236 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
237 | assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, True, False, '') | |
238 | ||
239 | ||
240 | def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats_example(): | |
241 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] | |
242 | ||
243 | ||
244 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
245 | @given([float, py23_str, int]) | |
246 | def test_number_extracter_excludes_plus_sign_in_float_definition_but_includes_exponent_for_unsigned_exp_floats(x): | |
247 | assume(len(x) <= 10) | |
248 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
249 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
250 | assert _number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, True, False, '') | |
251 | ||
252 | ||
253 | def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats_example(): | |
254 | assert _number_extracter('a5+5.034e-1', _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, 5.034, 'e', -1.0] | |
255 | ||
256 | ||
257 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
258 | @given([float, py23_str, int]) | |
259 | def test_number_extracter_includes_plus_and_minus_sign_in_float_definition_but_excludes_exponent_for_signed_noexp_floats(x): | |
260 | assume(len(x) <= 10) | |
261 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
262 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
263 | assert _number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, True, False, False, '') | |
264 | ||
265 | ||
266 | def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats_example(): | |
267 | assert _number_extracter('a5+5.034e-1', _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == ['a', 5.0, '+', 5.034, 'e-', 1.0] | |
268 | ||
269 | ||
270 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
271 | @given([float, py23_str, int]) | |
272 | def test_number_extracter_excludes_plus_sign_and_exponent_in_float_definition_for_unsigned_noexp_floats(x): | |
273 | assume(len(x) <= 10) | |
274 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
275 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
276 | assert _number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup) == float_splitter(s, False, False, False, '') | |
277 | ||
278 | ||
279 | def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints_example(): | |
280 | assert _number_extracter('a5+5.034e-1', _int_nosign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, '+', 5, '.', 34, 'e-', 1] | |
281 | ||
282 | ||
283 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
284 | @given([float, py23_str, int]) | |
285 | @example([10000000000000000000000000000000000000000000000000000000000000000000000000, | |
286 | 100000000000000000000000000000000000000000000000000000000000000000000000000, | |
287 | 100000000000000000000000000000000000000000000000000000000000000000000000000]) | |
288 | def test_number_extracter_excludes_plus_and_minus_sign_in_int_definition_for_unsigned_ints(x): | |
289 | assume(len(x) <= 10) | |
290 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
291 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, False, False, '') | |
292 | ||
293 | ||
294 | def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints_example(): | |
295 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_nosafe_nolocale_nogroup) == ['a', 5, 5, '.', 34, 'e', -1] | |
296 | ||
297 | ||
298 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
299 | @given([float, py23_str, int]) | |
300 | def test_number_extracter_includes_plus_and_minus_sign_in_int_definition_for_signed_ints(x): | |
301 | assume(len(x) <= 10) | |
302 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
303 | assert _number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup) == int_splitter(s, True, False, '') | |
304 | ||
305 | ||
306 | def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option_example(): | |
307 | assert _number_extracter('a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '', 0.5034] | |
308 | ||
309 | ||
310 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
311 | @given([float, py23_str, int]) | |
312 | def test_number_extracter_inserts_empty_string_between_floats_for_py3safe_option(x): | |
313 | assume(len(x) <= 10) | |
314 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
315 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
316 | assert _number_extracter(s, _float_sign_exp_re, *float_safe_nolocale_nogroup) == float_splitter(s, True, True, True, '') | |
317 | ||
318 | ||
319 | def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option_example(): | |
320 | assert _number_extracter('a5+5.034e-1', _int_sign_re, *int_safe_nolocale_nogroup) == ['a', 5, '', 5, '.', 34, 'e', -1] | |
321 | ||
322 | ||
323 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
324 | @given([float, py23_str, int]) | |
325 | def test_number_extracter_inserts_empty_string_between_ints_for_py3safe_option(x): | |
326 | assume(len(x) <= 10) | |
327 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
328 | assert _number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup) == int_splitter(s, True, True, '') | |
329 | ||
330 | ||
331 | def test_number_extracter_inserts_no_empty_string_py3safe_option_because_no_numbers_are_adjascent_example(): | |
332 | assert _number_extracter('a5+5.034e-1', _float_nosign_exp_re, *float_safe_nolocale_nogroup) == ['a', 5.0, '+', 0.5034] | |
333 | ||
334 | ||
335 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_example(): | |
336 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_nosafe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, 0.5034] | |
337 | ||
338 | ||
339 | def test_number_extracter_adds_leading_empty_string_if_input_begins_with_a_number_and_empty_string_between_numbers_for_py3safe_exmple(): | |
340 | assert _number_extracter('6a5+5.034e-1', _float_sign_exp_re, *float_safe_nolocale_nogroup) == ['', 6.0, 'a', 5.0, '', 0.5034] | |
341 | ||
342 | ||
343 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float_example(): | |
344 | assert _number_extracter('A5+5.034E-1', _float_sign_exp_re, *float_nosafe_nolocale_group) == ['aA', 5.0, 0.5034] | |
345 | ||
346 | ||
347 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
348 | @given([float, py23_str, int]) | |
349 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_float(x): | |
350 | assume(len(x) <= 10) | |
351 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
352 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
353 | t = float_splitter(s, True, True, False, '') | |
354 | t = [''.join([low(z) + z for z in y]) if type(y) != float else y for y in t] | |
355 | assert _number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_group) == t | |
356 | ||
357 | ||
358 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int_example(): | |
359 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_nolocale_group) == ['aA', 5, '++', 5, '..', 34, 'eE--', 1] | |
360 | ||
361 | ||
362 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
363 | @given([float, py23_str, int]) | |
364 | def test_number_extracter_doubles_letters_with_lowercase_version_with_groupletters_for_int(x): | |
365 | assume(len(x) <= 10) | |
366 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
367 | t = int_splitter(s, False, False, '') | |
368 | t = [''.join([low(z) + z for z in y]) if type(y) not in (int, long) else y for y in t] | |
369 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_group) == t | |
370 | ||
371 | ||
372 | def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale_example(): | |
373 | load_locale('en_US') | |
374 | strxfrm = get_strxfrm() | |
375 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_nogroup) == [strxfrm('A'), 5, strxfrm('+'), 5, strxfrm('.'), 34, strxfrm('E-'), 1] | |
376 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
377 | ||
378 | ||
379 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
380 | @given([float, py23_str, int]) | |
381 | def test_number_extracter_extracts_numbers_and_strxfrms_strings_with_use_locale(x): | |
382 | assume(len(x) <= 10) | |
383 | load_locale('en_US') | |
384 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
385 | t = int_splitter(s, False, False, null_string) | |
386 | try: # Account for locale bug on Python 3.2 | |
387 | t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), False) for i, y in enumerate(t)] | |
388 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_nogroup) == t | |
389 | except OverflowError: | |
390 | pass | |
391 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
392 | ||
393 | ||
394 | def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters_example(): | |
395 | load_locale('en_US') | |
396 | strxfrm = get_strxfrm() | |
397 | assert _number_extracter('A5+5.034E-1', _int_nosign_re, *int_nosafe_locale_group) == [strxfrm('aA'), 5, strxfrm('++'), 5, strxfrm('..'), 34, strxfrm('eE--'), 1] | |
398 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
399 | ||
400 | ||
401 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
402 | @given([float, py23_str, int]) | |
403 | def test_number_extracter_extracts_numbers_and_strxfrms_letter_doubled_strings_with_use_locale_and_groupletters(x): | |
404 | assume(len(x) <= 10) | |
405 | load_locale('en_US') | |
406 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
407 | t = int_splitter(s, False, False, null_string) | |
408 | try: # Account for locale bug on Python 3.2 | |
409 | t = [y if i == 0 and y is null_string else locale_convert(y, (fast_int, isint), True) for i, y in enumerate(t)] | |
410 | assert _number_extracter(s, _int_nosign_re, *int_nosafe_locale_group) == t | |
411 | except OverflowError: | |
412 | pass | |
413 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
414 | ||
415 | ||
416 | def test__natsort_key_with_nan_input_transforms_nan_to_negative_inf(): | |
417 | assert _natsort_key('nan', None, ns.FLOAT) == ('', float('-inf')) | |
418 | assert _natsort_key(float('nan'), None, 0) == ('', float('-inf')) | |
419 | ||
420 | ||
421 | def test__natsort_key_with_nan_input_and_NANLAST_transforms_nan_to_positive_inf(): | |
422 | assert _natsort_key('nan', None, ns.FLOAT | ns.NANLAST) == ('', float('+inf')) | |
423 | assert _natsort_key(float('nan'), None, ns.NANLAST) == ('', float('+inf')) | |
424 | assert ns.NL == ns.NANLAST | |
425 | ||
426 | ||
427 | # The remaining tests provide no examples, just hypothesis tests. | |
428 | # They only confirm that _natsort_key uses the above building blocks. | |
429 | ||
430 | ||
431 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
432 | @given([float, py23_str, int]) | |
433 | def test__natsort_key_with_float_and_signed_splits_input_into_string_and_signed_float_with_exponent(x): | |
434 | assume(len(x) <= 10) | |
435 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
436 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
437 | assert ns.F == ns.FLOAT | |
438 | assert ns.S == ns.SIGNED | |
439 | assert _natsort_key(s, None, ns.F | ns.S) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) | |
440 | ||
441 | ||
442 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
443 | @given([float, py23_str, int]) | |
444 | def test__natsort_key_with_real_splits_input_into_string_and_signed_float_with_exponent(x): | |
445 | assume(len(x) <= 10) | |
446 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
447 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
448 | assert ns.R == ns.F | ns.S | |
449 | assert _natsort_key(s, None, ns.R) == tuple(_number_extracter(s, _float_sign_exp_re, *float_nosafe_nolocale_nogroup)) | |
450 | ||
451 | ||
452 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
453 | @given([float, py23_str, int]) | |
454 | def test__natsort_key_with_real_matches_signed_float(x): | |
455 | assume(len(x) <= 10) | |
456 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
457 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
458 | assert _natsort_key(s, None, ns.R) == _natsort_key(s, None, ns.F | ns.S) | |
459 | ||
460 | ||
461 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
462 | @given([float, py23_str, int]) | |
463 | def test__natsort_key_with_float_and_signed_and_noexp_splits_input_into_string_and_signed_float_without_exponent(x): | |
464 | assume(len(x) <= 10) | |
465 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
466 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
467 | assert ns.N == ns.NOEXP | |
468 | assert _natsort_key(s, None, ns.F | ns.S | ns.N) == tuple(_number_extracter(s, _float_sign_noexp_re, *float_nosafe_nolocale_nogroup)) | |
469 | ||
470 | ||
471 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
472 | @given([float, py23_str, int]) | |
473 | def test__natsort_key_with_float_and_unsigned_splits_input_into_string_and_unsigned_float(x): | |
474 | assume(len(x) <= 10) | |
475 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
476 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
477 | assert ns.U == ns.UNSIGNED | |
478 | assert _natsort_key(s, None, ns.F | ns.U) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) | |
479 | # Default is unsigned search | |
480 | assert _natsort_key(s, None, ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_nolocale_nogroup)) | |
481 | ||
482 | ||
483 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
484 | @given([float, py23_str, int]) | |
485 | def test__natsort_key_with_float_and_noexp_splits_input_into_string_and_unsigned_float_without_exponent(x): | |
486 | assume(len(x) <= 10) | |
487 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
488 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
489 | assert _natsort_key(s, None, ns.F | ns.N) == tuple(_number_extracter(s, _float_nosign_noexp_re, *float_nosafe_nolocale_nogroup)) | |
490 | ||
491 | ||
492 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
493 | @given([float, py23_str, int]) | |
494 | def test__natsort_key_with_int_splits_input_into_string_and_unsigned_int(x): | |
495 | assume(len(x) <= 10) | |
496 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
497 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
498 | assert ns.I == ns.INT | |
499 | assert _natsort_key(s, None, ns.INT) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
500 | # Default is int search | |
501 | assert _natsort_key(s, None, ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
502 | # NOEXP is ignored for integers | |
503 | assert _natsort_key(s, None, ns.I | ns.NOEXP) == tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
504 | ||
505 | ||
506 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
507 | @given([float, py23_str, int]) | |
508 | def test__natsort_key_with_int_splits_and_signed_input_into_string_and_signed_int(x): | |
509 | assume(len(x) <= 10) | |
510 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
511 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
512 | assert _natsort_key(s, None, ns.INT | ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) | |
513 | assert _natsort_key(s, None, ns.SIGNED) == tuple(_number_extracter(s, _int_sign_re, *int_nosafe_nolocale_nogroup)) | |
514 | ||
515 | ||
516 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
517 | @given([float, py23_str, int]) | |
518 | def test__natsort_key_with_version_or_digit_matches_usigned_int(x): | |
519 | assume(len(x) <= 10) | |
520 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
521 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
522 | assert _natsort_key(s, None, ns.VERSION) == _natsort_key(s, None, ns.INT | ns.UNSIGNED) | |
523 | assert _natsort_key(s, None, ns.DIGIT) == _natsort_key(s, None, ns.VERSION) | |
524 | ||
525 | ||
526 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
527 | @given([float, py23_str, int]) | |
528 | def test__natsort_key_with_key_applies_key_function_before_splitting(x): | |
529 | assume(len(x) <= 10) | |
530 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
531 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
532 | assert _natsort_key(s, lambda x: x.upper(), ns.I) == tuple(_number_extracter(s.upper(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
533 | ||
534 | ||
535 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
536 | @given([float, py23_str, int]) | |
537 | def test__natsort_key_with_tuple_input_returns_nested_tuples(x): | |
538 | # Iterables are parsed recursively so you can sort lists of lists. | |
539 | assume(len(x) <= 10) | |
540 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
541 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
542 | t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
543 | assert _natsort_key((s, s), None, ns.I) == (t, t) | |
544 | ||
545 | ||
546 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
547 | @given([float, py23_str, int]) | |
548 | def test__natsort_key_with_tuple_input_but_itemgetter_key_returns_split_second_element(x): | |
549 | # A key is applied before recursion, but not in the recursive calls. | |
550 | assume(len(x) <= 10) | |
551 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
552 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
553 | t = tuple(_number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
554 | assert _natsort_key((s, s), itemgetter(1), ns.I) == t | |
555 | ||
556 | ||
557 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
558 | @given(float) | |
559 | def test__natsort_key_with_numeric_input_returns_number_with_leading_empty_string(x): | |
560 | assume(not isnan(x)) | |
561 | if x.is_integer(): | |
562 | x = int(x) | |
563 | assert _natsort_key(x, None, ns.I) == ('', x) | |
564 | ||
565 | ||
566 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
567 | @given([float, py23_str, int]) | |
568 | def test__natsort_key_with_TYPESAFE_inserts_spaces_between_numbers(x): | |
569 | # Turn on TYPESAFE to put a '' between adjacent numbers | |
570 | assume(len(x) <= 10) | |
571 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
572 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
573 | assert _natsort_key(s, None, ns.TYPESAFE | ns.S) == tuple(_number_extracter(s, _int_sign_re, *int_safe_nolocale_nogroup)) | |
574 | ||
575 | ||
576 | def test__natsort_key_with_invalid_alg_input_raises_ValueError(): | |
577 | # Invalid arguments give the correct response | |
578 | with raises(ValueError) as err: | |
579 | _natsort_key('a', None, '1') | |
580 | assert str(err.value) == "_natsort_key: 'alg' argument must be from the enum 'ns', got 1" | |
581 | ||
582 | ||
583 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
584 | @given([float, py23_str, int]) | |
585 | def test__natsort_key_with_IGNORECASE_lowercases_text(x): | |
586 | assume(len(x) <= 10) | |
587 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
588 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
589 | try: | |
590 | assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.casefold(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
591 | except AttributeError: | |
592 | assert _natsort_key(s, None, ns.IGNORECASE) == tuple(_number_extracter(s.lower(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
593 | ||
594 | ||
595 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
596 | @given([float, py23_str, int]) | |
597 | def test__natsort_key_with_LOWERCASEFIRST_inverts_text_case(x): | |
598 | assume(len(x) <= 10) | |
599 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
600 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
601 | assert _natsort_key(s, None, ns.LOWERCASEFIRST) == tuple(_number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup)) | |
602 | ||
603 | ||
604 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
605 | @given([float, py23_str, int]) | |
606 | def test__natsort_key_with_GROUPLETTERS_doubles_text_with_lowercase_letter_first(x): | |
607 | assume(len(x) <= 10) | |
608 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
609 | s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) | |
610 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
611 | t = _number_extracter(s, _int_nosign_re, *int_nosafe_nolocale_nogroup) | |
612 | assert _natsort_key(s, None, ns.GROUPLETTERS) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) | |
613 | ||
614 | ||
615 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
616 | @given([float, py23_str, int]) | |
617 | def test__natsort_key_with_GROUPLETTERS_and_LOWERCASEFIRST_inverts_text_first_then_doubles_letters_with_lowercase_letter_first(x): | |
618 | assume(len(x) <= 10) | |
619 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
620 | s = ''.join(ichain([repr(y)] if type(y) in (float, long, int) else [low(y), y] for y in x)) | |
621 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
622 | t = _number_extracter(s.swapcase(), _int_nosign_re, *int_nosafe_nolocale_nogroup) | |
623 | assert _natsort_key(s, None, ns.G | ns.LF) == tuple(''.join(low(z) + z for z in y) if type(y) not in (float, long, int) else y for y in t) | |
624 | ||
625 | ||
626 | def test__natsort_key_with_bytes_input_only_applies_LOWERCASEFIRST_or_IGNORECASE_and_returns_in_tuple(): | |
627 | if sys.version[0] == '3': | |
628 | assert _natsort_key(b'Apple56', None, ns.I) == (b'Apple56',) | |
629 | assert _natsort_key(b'Apple56', None, ns.LF) == (b'aPPLE56',) | |
630 | assert _natsort_key(b'Apple56', None, ns.IC) == (b'apple56',) | |
631 | assert _natsort_key(b'Apple56', None, ns.G) == (b'Apple56',) | |
632 | else: | |
633 | assert True | |
634 | ||
635 | ||
636 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
637 | @given([float, py23_str, int]) | |
638 | def test__natsort_key_with_LOCALE_transforms_floats_according_to_the_current_locale_and_strxfrms_strings(x): | |
639 | # Locale aware sorting | |
640 | assume(len(x) <= 10) | |
641 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
642 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
643 | load_locale('en_US') | |
644 | if dumb_sort(): | |
645 | assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) | |
646 | else: | |
647 | assert _natsort_key(s, None, ns.LOCALE | ns.F) == tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) | |
648 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
649 | ||
650 | ||
651 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
652 | @given([float, py23_str, int]) | |
653 | def test__natsort_key_with_LOCALE_and_UNGROUPLETTERS_places_space_before_string_with_capital_first_letter(x): | |
654 | # Locale aware sorting | |
655 | assume(len(x) <= 10) | |
656 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
657 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
658 | load_locale('en_US') | |
659 | if dumb_sort(): | |
660 | t = tuple(_number_extracter(s.swapcase(), _float_nosign_exp_re, *float_nosafe_locale_group)) | |
661 | else: | |
662 | t = tuple(_number_extracter(s, _float_nosign_exp_re, *float_nosafe_locale_nogroup)) | |
663 | if not t: | |
664 | r = (t, t) | |
665 | elif t[0] is null_string: | |
666 | r = ((b'' if use_pyicu else '',), t) | |
667 | else: | |
668 | r = ((s[0],), t) | |
669 | assert _natsort_key(s, None, ns.LOCALE | ns.UNGROUPLETTERS | ns.F) == r | |
670 | # The below are all aliases for UNGROUPLETTERS | |
671 | assert ns.UNGROUPLETTERS == ns.UG | |
672 | assert ns.UNGROUPLETTERS == ns.CAPITALFIRST | |
673 | assert ns.UNGROUPLETTERS == ns.C | |
674 | locale.setlocale(locale.LC_NUMERIC, str('')) | |
675 | ||
676 | ||
677 | @pytest.mark.skipif(not use_hypothesis, reason='requires python2.7 or greater') | |
678 | @given([float, py23_str, int]) | |
679 | def test__natsort_key_with_UNGROUPLETTERS_does_nothing_without_LOCALE(x): | |
680 | assume(len(x) <= 10) | |
681 | assume(not any(type(y) == float and isnan(y) for y in x)) | |
682 | s = ''.join(repr(y) if type(y) in (float, long, int) else y for y in x) | |
683 | assert _natsort_key(s, None, ns.UG | ns.I) == _natsort_key(s, None, ns.I) | |
684 | ||
685 | ||
686 | # It is difficult to generate code that will create random filesystem paths, | |
687 | # so "example" based tests are given for the PATH option. | |
688 | ||
689 | ||
690 | def test__natsort_key_with_absolute_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_root_and_split_extensions(): | |
691 | # Turn on PATH to split a file path into components | |
692 | assert _natsort_key('/p/Folder (10)/file34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('/',), ('p', ), ('Folder (', 10.0, ')',), ('file', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
693 | ||
694 | ||
695 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_with_leading_relative_parent_and_split_extensions(): | |
696 | assert _natsort_key('../Folder (10)/file (2).tar.gz', None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
697 | ||
698 | ||
699 | def test__natsort_key_with_relative_path_intput_and_PATH_returns_nested_tuple_where_each_element_is_path_component_and_split_extensions(): | |
700 | assert _natsort_key('Folder (10)/file.f34.5nm (2).tar.gz', None, ns.PATH | ns.F) == (('Folder (', 10.0, ')',), ('file.f', 34.5, 'nm (', 2.0, ')'), ('.tar',), ('.gz',)) | |
701 | ||
702 | ||
703 | def test__natsort_key_with_pathlib_intput_and_PATH_returns_nested_tuples(): | |
704 | # Converts pathlib PurePath (and subclass) objects to string before sorting | |
705 | assert _natsort_key(pathlib.Path('../Folder (10)/file (2).tar.gz'), None, ns.PATH | ns.F) == (('..', ), ('Folder (', 10.0, ')',), ('file (', 2.0, ')'), ('.tar',), ('.gz',)) | |
706 | ||
707 | ||
708 | def test__natsort_key_with_numeric_input_and_PATH_returns_number_in_nested_tuple(): | |
709 | # It gracefully handles as_path for numeric input by putting an extra tuple around it | |
710 | # so it will sort against the other as_path results. | |
711 | assert _natsort_key(10, None, ns.PATH) == (('', 10),) | |
712 | ||
713 | ||
714 | def test__natsort_key_with_tuple_of_paths_and_PATH_returns_triply_nested_tuple(): | |
715 | # PATH also handles recursion well. | |
716 | assert _natsort_key(('/Folder', '/Folder (1)'), None, ns.PATH) == ((('/',), ('Folder',)), (('/',), ('Folder (', 1, ')'))) | |
228 | y = tuple(pathlib.Path(z).parts) | |
229 | assert tuple(_path_splitter(z)) == y[:-1] + (pathlib.Path(z).stem, pathlib.Path(z).suffix) | |
230 | ||
231 | ||
232 | @given(integers()) | |
233 | def test_py23_cmp(x): | |
234 | assert py23_cmp(x, x) == 0 | |
235 | assert py23_cmp(x, x + 1) < 0 | |
236 | assert py23_cmp(x, x - 1) > 0 |
4 | 4 | |
5 | 5 | [tox] |
6 | 6 | envlist = |
7 | py26, py27, py32, py33, py34, pypy | |
7 | py27, py34, py35, py36, py37, pypy | |
8 | # Other valid evironments are: | |
9 | # docs | |
10 | # release | |
11 | ||
12 | # Don't error out if a user hasn't installed all python versions. | |
13 | skip_missing_interpreters = | |
14 | true | |
8 | 15 | |
9 | 16 | [testenv] |
10 | commands = {envpython} setup.py test | |
11 | deps = pytest | |
17 | passenv = | |
18 | WITH_EXTRAS | |
19 | deps = | |
20 | pipenv | |
21 | extras = | |
22 | {env:WITH_EXTRAS:} | |
23 | commands = | |
24 | pipenv install --dev --skip-lock | |
25 | # Only run How It Works doctest on Python 3.6. | |
26 | py36: {envpython} -m doctest -o IGNORE_EXCEPTION_DETAIL docs/source/howitworks.rst | |
27 | # Other doctests are run for all pythons. | |
28 | pytest README.rst docs/source/intro.rst docs/source/examples.rst | |
29 | pytest --doctest-modules {envsitepackagesdir}/natsort | |
30 | # Full test suite. Allow the user to pass command-line objects. | |
31 | pytest --flakes --pep8 --tb=short --cov {envsitepackagesdir}/natsort --cov-report term-missing {posargs:} | |
32 | ||
33 | # Build documentation. | |
34 | [testenv:docs] | |
35 | deps = | |
36 | sphinx | |
37 | sphinx_rtd_theme | |
38 | commands = | |
39 | {envpython} setup.py build_sphinx | |
40 | ||
41 | [testenv:release] | |
42 | deps = | |
43 | twine | |
44 | check-manifest | |
45 | commands = | |
46 | check-manifest | |
47 | {envpython} setup.py sdist bdist_wheel | |
48 | twine upload dist/* |