Update upstream source from tag 'upstream/0.11.0'
Update to upstream version '0.11.0'
with Debian dir 811a86892cc89d887b82a63bdf1a3dc151669a3b
Bas Couwenberg
1 year, 10 months ago
9 | 9 | geopandas/io/tests/* |
10 | 10 | geopandas/tools/tests/* |
11 | 11 | geopandas/_version.py |
12 | geopandas/datasets/naturalearth_creation.py |
10 | 10 | |
11 | 11 | - [ ] I have confirmed this bug exists on the latest version of geopandas. |
12 | 12 | |
13 | - [ ] (optional) I have confirmed this bug exists on the master branch of geopandas. | |
13 | - [ ] (optional) I have confirmed this bug exists on the main branch of geopandas. | |
14 | 14 | |
15 | 15 | --- |
16 | 16 |
18 | 18 | with: |
19 | 19 | python-version: "3.x" |
20 | 20 | |
21 | - name: Build a binary wheel and a source tarball | |
21 | - name: Build source and wheel distributions | |
22 | 22 | run: | |
23 | python -m pip install --upgrade pip | |
24 | pip install setuptools wheel | |
25 | python setup.py sdist bdist_wheel | |
23 | python -m pip install --upgrade build twine | |
24 | python -m build | |
25 | twine check --strict dist/* | |
26 | 26 | |
27 | 27 | - name: Publish distribution to PyPI |
28 | 28 | uses: pypa/gh-action-pypi-publish@master |
1 | 1 | |
2 | 2 | on: |
3 | 3 | push: |
4 | branches: [master] | |
4 | branches: [main, 0.**] | |
5 | 5 | pull_request: |
6 | branches: [master] | |
6 | branches: [main, 0.**] | |
7 | 7 | schedule: |
8 | 8 | - cron: "0 0 * * *" |
9 | 9 | |
10 | concurrency: | |
10 | concurrency: | |
11 | 11 | group: ${{ github.workflow }}-${{ github.ref }} |
12 | 12 | cancel-in-progress: true |
13 | 13 | |
16 | 16 | runs-on: ubuntu-latest |
17 | 17 | |
18 | 18 | steps: |
19 | - uses: actions/checkout@v2 | |
20 | - uses: actions/setup-python@v2 | |
21 | - uses: pre-commit/action@v2.0.0 | |
19 | - uses: actions/checkout@v3 | |
20 | - uses: actions/setup-python@v3 | |
21 | - uses: pre-commit/action@v2.0.3 | |
22 | 22 | |
23 | 23 | Test: |
24 | 24 | needs: Linting |
34 | 34 | postgis: [false] |
35 | 35 | dev: [false] |
36 | 36 | env: |
37 | - ci/envs/37-minimal.yaml | |
38 | - ci/envs/38-no-optional-deps.yaml | |
39 | - ci/envs/37-pd10.yaml | |
40 | - ci/envs/37-latest-defaults.yaml | |
41 | - ci/envs/37-latest-conda-forge.yaml | |
37 | - ci/envs/38-minimal.yaml | |
38 | - ci/envs/39-no-optional-deps.yaml | |
39 | - ci/envs/38-pd11-defaults.yaml | |
40 | - ci/envs/38-latest-defaults.yaml | |
42 | 41 | - ci/envs/38-latest-conda-forge.yaml |
42 | - ci/envs/39-pd12-conda-forge.yaml | |
43 | 43 | - ci/envs/39-latest-conda-forge.yaml |
44 | - ci/envs/310-latest-conda-forge.yaml | |
44 | 45 | include: |
45 | - env: ci/envs/37-latest-conda-forge.yaml | |
46 | os: macos-latest | |
47 | postgis: false | |
48 | dev: false | |
49 | 46 | - env: ci/envs/38-latest-conda-forge.yaml |
50 | 47 | os: macos-latest |
51 | 48 | postgis: false |
52 | 49 | dev: false |
53 | - env: ci/envs/37-latest-conda-forge.yaml | |
54 | os: windows-latest | |
50 | - env: ci/envs/39-latest-conda-forge.yaml | |
51 | os: macos-latest | |
55 | 52 | postgis: false |
56 | 53 | dev: false |
57 | 54 | - env: ci/envs/38-latest-conda-forge.yaml |
58 | 55 | os: windows-latest |
59 | 56 | postgis: false |
60 | 57 | dev: false |
61 | - env: ci/envs/38-dev.yaml | |
58 | - env: ci/envs/39-latest-conda-forge.yaml | |
59 | os: windows-latest | |
60 | postgis: false | |
61 | dev: false | |
62 | - env: ci/envs/310-dev.yaml | |
62 | 63 | os: ubuntu-latest |
63 | 64 | dev: true |
64 | 65 | |
65 | 66 | steps: |
66 | - uses: actions/checkout@v2 | |
67 | - uses: actions/checkout@v3 | |
67 | 68 | |
68 | 69 | - name: Setup Conda |
69 | 70 | uses: conda-incubator/setup-miniconda@v2 |
70 | 71 | with: |
71 | 72 | environment-file: ${{ matrix.env }} |
73 | miniforge-version: latest | |
74 | miniforge-variant: Mambaforge | |
75 | use-mamba: true | |
72 | 76 | |
73 | 77 | - name: Check and Log Environment |
74 | 78 | run: | |
101 | 105 | pytest -v -r s -n auto --color=yes --cov=geopandas --cov-append --cov-report term-missing --cov-report xml geopandas/ |
102 | 106 | |
103 | 107 | - name: Test with PostGIS |
104 | if: contains(matrix.env, '38-latest-conda-forge.yaml') && contains(matrix.os, 'ubuntu') | |
108 | if: contains(matrix.env, '39-pd12-conda-forge.yaml') && contains(matrix.os, 'ubuntu') | |
105 | 109 | env: |
106 | 110 | PGUSER: postgres |
107 | 111 | PGPASSWORD: postgres |
113 | 117 | pytest -v -r s --color=yes --cov=geopandas --cov-append --cov-report term-missing --cov-report xml geopandas/io/tests/test_sql.py | tee /dev/stderr | if grep SKIPPED >/dev/null;then echo "TESTS SKIPPED, FAILING" && exit 1;fi |
114 | 118 | |
115 | 119 | - name: Test docstrings |
116 | if: contains(matrix.env, '38-latest-conda-forge.yaml') && contains(matrix.os, 'ubuntu') | |
120 | if: contains(matrix.env, '39-pd12-conda-forge.yaml') && contains(matrix.os, 'ubuntu') | |
117 | 121 | env: |
118 | 122 | USE_PYGEOS: 1 |
119 | 123 | run: | |
120 | 124 | pytest -v --color=yes --doctest-only geopandas --ignore=geopandas/datasets |
121 | 125 | |
122 | - uses: codecov/codecov-action@v1 | |
126 | - uses: codecov/codecov-action@v2 |
65 | 65 | |
66 | 66 | geopandas.egg-info |
67 | 67 | geopandas/version.py |
68 | geopandas/datasets/ne_110m_admin_0_countries.zip | |
68 | 69 | |
69 | 70 | .asv |
70 | 71 | doc/source/getting_started/my_file.geojson |
0 | 0 | files: 'geopandas\/' |
1 | 1 | repos: |
2 | - repo: https://github.com/python/black | |
3 | rev: 20.8b1 | |
4 | hooks: | |
5 | - id: black | |
6 | language_version: python3 | |
7 | - repo: https://gitlab.com/pycqa/flake8 | |
8 | rev: 3.8.3 | |
9 | hooks: | |
10 | - id: flake8 | |
11 | language: python_venv | |
2 | - repo: https://github.com/psf/black | |
3 | rev: 22.3.0 | |
4 | hooks: | |
5 | - id: black | |
6 | language_version: python3 | |
7 | - repo: https://gitlab.com/pycqa/flake8 | |
8 | rev: 3.8.3 | |
9 | hooks: | |
10 | - id: flake8 | |
11 | language: python_venv |
0 | 0 | Changelog |
1 | 1 | ========= |
2 | 2 | |
3 | Version 0.11 (June 20, 2022) | |
4 | ---------------------------- | |
5 | ||
6 | Highlights of this release: | |
7 | ||
8 | - The ``geopandas.read_file()`` and `GeoDataFrame.to_file()` methods to read | |
9 | and write GIS file formats can now optionally use the | |
10 | [pyogrio](https://github.com/geopandas/pyogrio/) package under the hood | |
11 | through the ``engine="pyogrio"`` keyword. The pyogrio package implements | |
12 | vectorized IO for GDAL/OGR vector data sources, and is faster compared to | |
13 | the ``fiona``-based engine (#2225). | |
14 | - GeoParquet support updated to implement | |
15 | [v0.4.0](https://github.com/opengeospatial/geoparquet/releases/tag/v0.4.0) of the | |
16 | OpenGeospatial/GeoParquet specification (#2441). Backwards compatibility with v0.1.0 of | |
17 | the metadata spec (implemented in the previous releases of GeoPandas) is guaranteed, | |
18 | and reading and writing Parquet and Feather files will no longer produce a ``UserWarning`` | |
19 | (#2327). | |
20 | ||
21 | New features and improvements: | |
22 | ||
23 | - Improved handling of GeoDataFrame when the active geometry column is | |
24 | lost from the GeoDataFrame. Previously, square bracket indexing ``gdf[[...]]`` returned | |
25 | a GeoDataFrame when the active geometry column was retained and a DataFrame was | |
26 | returned otherwise. Other pandas indexing methods (``loc``, ``iloc``, etc) did not follow | |
27 | the same rules. The new behaviour for all indexing/reshaping operations is now as | |
28 | follows (#2329, #2060): | |
29 | - If operations produce a ``DataFrame`` containing the active geometry column, a | |
30 | GeoDataFrame is returned | |
31 | - If operations produce a ``DataFrame`` containing ``GeometryDtype`` columns, but not the | |
32 | active geometry column, a ``GeoDataFrame`` is returned, where the active geometry | |
33 | column is set to ``None`` (set the new geometry column with ``set_geometry()``) | |
34 | - If operations produce a ``DataFrame`` containing no ``GeometryDtype`` columns, a | |
35 | ``DataFrame`` is returned (this can be upcast again by calling ``set_geometry()`` or the | |
36 | ``GeoDataFrame`` constructor) | |
37 | - If operations produce a ``Series`` of ``GeometryDtype``, a ``GeoSeries`` is returned, | |
38 | otherwise ``Series`` is returned. | |
39 | - Error messages for having an invalid geometry column | |
40 | have been improved, indicating the name of the last valid active geometry column set | |
41 | and whether other geometry columns can be promoted to the active geometry column | |
42 | (#2329). | |
43 | ||
44 | - Datetime fields are now read and written correctly for GIS formats which support them | |
45 | (e.g. GPKG, GeoJSON) with fiona 1.8.14 or higher. Previously, datetimes were read as | |
46 | strings (#2202). | |
47 | - ``folium.Map`` keyword arguments can now be specified as the ``map_kwds`` argument to | |
48 | ``GeoDataFrame.explore()`` method (#2315). | |
49 | - Add a new parameter ``style_function`` to ``GeoDataFrame.explore()`` to enable plot styling | |
50 | based on GeoJSON properties (#2377). | |
51 | - It is now possible to write an empty ``GeoDataFrame`` to a file for supported formats | |
52 | (#2240). Attempting to do so will now emit a ``UserWarning`` instead of a ``ValueError``. | |
53 | - Fast rectangle clipping has been exposed as ``GeoSeries/GeoDataFrame.clip_by_rect()`` | |
54 | (#1928). | |
55 | - The ``mask`` parameter of ``GeoSeries/GeoDataFrame.clip()`` now accepts a rectangular mask | |
56 | as a list-like to perform fast rectangle clipping using the new | |
57 | ``GeoSeries/GeoDataFrame.clip_by_rect()`` (#2414). | |
58 | - Bundled demo dataset ``naturalearth_lowres`` has been updated to version 5.0.1 of the | |
59 | source, with field ``ISO_A3`` manually corrected for some cases (#2418). | |
60 | ||
61 | Deprecations and compatibility notes: | |
62 | ||
63 | - The active development branch of geopandas on GitHub has been renamed from master to | |
64 | main (#2277). | |
65 | - Deprecated methods ``GeometryArray.equals_exact()`` and ``GeometryArray.almost_equals()`` | |
66 | have been removed. They should | |
67 | be replaced with ``GeometryArray.geom_equals_exact()`` and | |
68 | ``GeometryArray.geom_almost_equals()`` respectively (#2267). | |
69 | - Deprecated CRS functions ``explicit_crs_from_epsg()``, ``epsg_from_crs()`` and | |
70 | ``get_epsg_file_contents()`` were removed (#2340). | |
71 | - Warning about the behaviour change to ``GeoSeries.isna()`` with empty | |
72 | geometries present has been removed (#2349). | |
73 | - Specifying a CRS in the ``GeoDataFrame/GeoSeries`` constructor which contradicted the | |
74 | underlying ``GeometryArray`` now raises a ``ValueError`` (#2100). | |
75 | - Specifying a CRS in the ``GeoDataFrame`` constructor when no geometry column is provided | |
76 | and calling ``GeoDataFrame. set_crs`` on a ``GeoDataFrame`` without an active geometry | |
77 | column now raise a ``ValueError`` (#2100) | |
78 | - Passing non-geometry data to the``GeoSeries`` constructor is now fully deprecated and | |
79 | will raise a ``TypeError`` (#2314). Previously, a ``pandas.Series`` was returned for | |
80 | non-geometry data. | |
81 | - Deprecated ``GeoSeries/GeoDataFrame`` set operations ``__xor__()``, | |
82 | ``__or__()``, ``__and__()`` and ``__sub__()``, ``geopandas.io.file.read_file``/``to_file`` and | |
83 | ``geopandas.io.sql.read_postgis`` now emit ``FutureWarning`` instead of | |
84 | ``DeprecationWarning`` and will be completely removed in a future release. | |
85 | - Accessing the ``crs`` of a ``GeoDataFrame`` without active geometry column is deprecated and will be removed in GeoPandas 0.12 (#2373). | |
86 | ||
87 | Bug fixes: | |
88 | ||
89 | - ``GeoSeries.to_frame`` now creates a ``GeoDataFrame`` with the geometry column name set | |
90 | correctly (#2296) | |
91 | - Fix pickle files created with pygeos installed can not being readable when pygeos is | |
92 | not installed (#2237). | |
93 | - Fixed ``UnboundLocalError`` in ``GeoDataFrame.plot()`` using ``legend=True`` and | |
94 | ``missing_kwds`` (#2281). | |
95 | - Fix ``explode()`` incorrectly relating index to columns, including where the input index | |
96 | is not unique (#2292) | |
97 | - Fix ``GeoSeries.[xyz]`` raising an ``IndexError`` when the underlying GeoSeries contains | |
98 | empty points (#2335). Rows corresponding to empty points now contain ``np.nan``. | |
99 | - Fix ``GeoDataFrame.iloc`` raising a ``TypeError`` when indexing a ``GeoDataFrame`` with only | |
100 | a single column of ``GeometryDtype`` (#1970). | |
101 | - Fix ``GeoDataFrame.iterfeatures()`` not returning features with the same field order as | |
102 | ``GeoDataFrame.columns`` (#2396). | |
103 | - Fix ``GeoDataFrame.from_features()`` to support reading GeoJSON with null properties | |
104 | (#2243). | |
105 | - Fix ``GeoDataFrame.to_parquet()`` not intercepting ``engine`` keyword argument, breaking | |
106 | consistency with pandas (#2227) | |
107 | - Fix ``GeoDataFrame.explore()`` producing an error when ``column`` is of boolean dtype | |
108 | (#2403). | |
109 | - Fix an issue where ``GeoDataFrame.to_postgis()`` output the wrong SRID for ESRI | |
110 | authority CRS (#2414). | |
111 | - Fix ``GeoDataFrame.from_dict/from_features`` classmethods using ``GeoDataFrame`` rather | |
112 | than ``cls`` as the constructor. | |
113 | - Fix ``GeoDataFrame.plot()`` producing incorrect colors with mixed geometry types when | |
114 | ``colors`` keyword is provided. (#2420) | |
115 | ||
116 | Notes on (optional) dependencies: | |
117 | ||
118 | - GeoPandas 0.11 drops support for Python 3.7 and pandas 0.25 (the minimum supported | |
119 | pandas version is now 1.0.5). Further, the minimum required versions for the listed | |
120 | dependencies have now changed to shapely 1.7, fiona 1.8.13.post1, pyproj 2.6.1.post1, | |
121 | matplotlib 3.2, mapclassify 2.4.0 (#2358, #2391) | |
122 | ||
123 | ||
3 | 124 | Version 0.10.2 (October 16, 2021) |
4 | 125 | --------------------------------- |
5 | 126 | |
6 | 127 | Small bug-fix release: |
7 | 128 | |
8 | - Fix regression in `overlay()` in case no geometries are intersecting (but | |
129 | - Fix regression in ``overlay()`` in case no geometries are intersecting (but | |
9 | 130 | have overlapping total bounds) (#2172). |
10 | - Fix regression in `overlay()` with `keep_geom_type=True` in case the | |
131 | - Fix regression in ``overlay()`` with ``keep_geom_type=True`` in case the | |
11 | 132 | overlay of two geometries in a GeometryCollection with other geometry types |
12 | 133 | (#2177). |
13 | - Fix `overlay()` to honor the `keep_geom_type` keyword for the | |
14 | `op="differnce"` case (#2164). | |
15 | - Fix regression in `plot()` with a mapclassify `scheme` in case the | |
134 | - Fix ``overlay()`` to honor the ``keep_geom_type`` keyword for the | |
135 | ``op="differnce"`` case (#2164). | |
136 | - Fix regression in ``plot()`` with a mapclassify ``scheme`` in case the | |
16 | 137 | formatted legend labels have duplicates (#2166). |
17 | - Fix a bug in the `explore()` method ignoring the `vmin` and `vmax` keywords | |
138 | - Fix a bug in the ``explore()`` method ignoring the ``vmin`` and ``vmax`` keywords | |
18 | 139 | in case they are set to 0 (#2175). |
19 | - Fix `unary_union` to correctly handle a GeoSeries with missing values (#2181). | |
20 | - Avoid internal deprecation warning in `clip()` (#2179). | |
140 | - Fix ``unary_union`` to correctly handle a GeoSeries with missing values (#2181). | |
141 | - Avoid internal deprecation warning in ``clip()`` (#2179). | |
21 | 142 | |
22 | 143 | |
23 | 144 | Version 0.10.1 (October 8, 2021) |
25 | 146 | |
26 | 147 | Small bug-fix release: |
27 | 148 | |
28 | - Fix regression in `overlay()` with non-overlapping geometries and a | |
29 | non-default `how` (i.e. not "intersection") (#2157). | |
149 | - Fix regression in ``overlay()`` with non-overlapping geometries and a | |
150 | non-default ``how`` (i.e. not "intersection") (#2157). | |
30 | 151 | |
31 | 152 | |
32 | 153 | Version 0.10.0 (October 3, 2021) |
34 | 155 | |
35 | 156 | Highlights of this release: |
36 | 157 | |
37 | - A new `sjoin_nearest()` method to join based on proximity, with the | |
38 | ability to set a maximum search radius (#1865). In addition, the `sindex` | |
158 | - A new ``sjoin_nearest()`` method to join based on proximity, with the | |
159 | ability to set a maximum search radius (#1865). In addition, the ``sindex`` | |
39 | 160 | attribute gained a new method for a "nearest" spatial index query (#1865, |
40 | 161 | #2053). |
41 | - A new `explore()` method on GeoDataFrame and GeoSeries with native support | |
162 | - A new ``explore()`` method on GeoDataFrame and GeoSeries with native support | |
42 | 163 | for interactive visualization based on folium / leaflet.js (#1953) |
43 | - The `geopandas.sjoin()`/`overlay()`/`clip()` functions are now also | |
164 | - The ``geopandas.sjoin()``/``overlay()``/``clip()`` functions are now also | |
44 | 165 | available as methods on the GeoDataFrame (#2141, #1984, #2150). |
45 | 166 | |
46 | 167 | New features and improvements: |
47 | 168 | |
48 | - Add support for pandas' `value_counts()` method for geometry dtype (#2047). | |
49 | - The `explode()` method has a new `ignore_index` keyword (consistent with | |
169 | - Add support for pandas' ``value_counts()`` method for geometry dtype (#2047). | |
170 | - The ``explode()`` method has a new ``ignore_index`` keyword (consistent with | |
50 | 171 | pandas' explode method) to reset the index in the result, and a new |
51 | `index_parts` keywords to control whether a cumulative count indexing the | |
172 | ``index_parts`` keywords to control whether a cumulative count indexing the | |
52 | 173 | parts of the exploded multi-geometries should be added (#1871). |
53 | - `points_from_xy()` is now available as a GeoSeries method `from_xy` (#1936). | |
54 | - The `to_file()` method will now attempt to detect the driver (if not | |
174 | - ``points_from_xy()`` is now available as a GeoSeries method ``from_xy`` (#1936). | |
175 | - The ``to_file()`` method will now attempt to detect the driver (if not | |
55 | 176 | specified) based on the extension of the provided filename, instead of |
56 | 177 | defaulting to ESRI Shapefile (#1609). |
57 | - Support for the `storage_options` keyword in `read_parquet()` for | |
178 | - Support for the ``storage_options`` keyword in ``read_parquet()`` for | |
58 | 179 | specifying filesystem-specific options (e.g. for S3) based on fsspec (#2107). |
59 | - The read/write functions now support `~` (user home directory) expansion (#1876). | |
60 | - Support the `convert_dtypes()` method from pandas to preserve the | |
180 | - The read/write functions now support ``~`` (user home directory) expansion (#1876). | |
181 | - Support the ``convert_dtypes()`` method from pandas to preserve the | |
61 | 182 | GeoDataFrame class (#2115). |
62 | - Support WKB values in the hex format in `GeoSeries.from_wkb()` (#2106). | |
63 | - Update the `estimate_utm_crs()` method to handle crossing the antimeridian | |
183 | - Support WKB values in the hex format in ``GeoSeries.from_wkb()`` (#2106). | |
184 | - Update the ``estimate_utm_crs()`` method to handle crossing the antimeridian | |
64 | 185 | with pyproj 3.1+ (#2049). |
65 | 186 | - Improved heuristic to decide how many decimals to show in the repr based on |
66 | 187 | whether the CRS is projected or geographic (#1895). |
67 | - Switched the default for `geocode()` from GeoCode.Farm to the Photon | |
188 | - Switched the default for ``geocode()`` from GeoCode.Farm to the Photon | |
68 | 189 | geocoding API (https://photon.komoot.io) (#2007). |
69 | 190 | |
70 | 191 | Deprecations and compatibility notes: |
71 | 192 | |
72 | - The `op=` keyword of `sjoin()` to indicate which spatial predicate to use | |
73 | for joining is being deprecated and renamed in favor of a new `predicate=` | |
193 | - The ``op=`` keyword of ``sjoin()`` to indicate which spatial predicate to use | |
194 | for joining is being deprecated and renamed in favor of a new ``predicate=`` | |
74 | 195 | keyword (#1626). |
75 | - The `cascaded_union` attribute is deprecated, use `unary_union` instead (#2074). | |
196 | - The ``cascaded_union`` attribute is deprecated, use ``unary_union`` instead (#2074). | |
76 | 197 | - Constructing a GeoDataFrame with a duplicated "geometry" column is now |
77 | disallowed. This can also raise an error in the `pd.concat(.., axis=1)` | |
198 | disallowed. This can also raise an error in the ``pd.concat(.., axis=1)`` | |
78 | 199 | function if this results in duplicated active geometry columns (#2046). |
79 | - The `explode()` method currently returns a GeoSeries/GeoDataFrame with a | |
200 | - The ``explode()`` method currently returns a GeoSeries/GeoDataFrame with a | |
80 | 201 | MultiIndex, with an additional level with indices of the parts of the |
81 | 202 | exploded multi-geometries. For consistency with pandas, this will change in |
82 | the future and the new `index_parts` keyword is added to control this. | |
83 | ||
84 | Bug fixes: | |
85 | ||
86 | - Fix in the `clip()` function to correctly clip MultiPoints instead of | |
203 | the future and the new ``index_parts`` keyword is added to control this. | |
204 | ||
205 | Bug fixes: | |
206 | ||
207 | - Fix in the ``clip()`` function to correctly clip MultiPoints instead of | |
87 | 208 | leaving them intact when partly outside of the clip bounds (#2148). |
88 | - Fix `GeoSeries.isna()` to correctly return a boolean Series in case of an | |
209 | - Fix ``GeoSeries.isna()`` to correctly return a boolean Series in case of an | |
89 | 210 | empty GeoSeries (#2073). |
90 | 211 | - Fix the GeoDataFrame constructor to preserve the geometry name when the |
91 | argument is already a GeoDataFrame object (i.e. `GeoDataFrame(gdf)`) (#2138). | |
212 | argument is already a GeoDataFrame object (i.e. ``GeoDataFrame(gdf)``) (#2138). | |
92 | 213 | - Fix loss of the values' CRS when setting those values as a column |
93 | (`GeoDataFrame.__setitem__`) (#1963) | |
94 | - Fix in `GeoDataFrame.apply()` to preserve the active geometry column name | |
214 | (``GeoDataFrame.__setitem__``) (#1963) | |
215 | - Fix in ``GeoDataFrame.apply()`` to preserve the active geometry column name | |
95 | 216 | (#1955). |
96 | - Fix in `sjoin()` to not ignore the suffixes in case of a right-join | |
97 | (`how="right`) (#2065). | |
98 | - Fix `GeoDataFrame.explode()` with a MultiIndex (#1945). | |
99 | - Fix the handling of missing values in `to/from_wkb` and `to_from_wkt` (#1891). | |
100 | - Fix `to_file()` and `to_json()` when DataFrame has duplicate columns to | |
217 | - Fix in ``sjoin()`` to not ignore the suffixes in case of a right-join | |
218 | (``how="right``) (#2065). | |
219 | - Fix ``GeoDataFrame.explode()`` with a MultiIndex (#1945). | |
220 | - Fix the handling of missing values in ``to/from_wkb`` and ``to_from_wkt`` (#1891). | |
221 | - Fix ``to_file()`` and ``to_json()`` when DataFrame has duplicate columns to | |
101 | 222 | raise an error (#1900). |
102 | 223 | - Fix bug in the colors shown with user-defined classification scheme (#2019). |
103 | - Fix handling of the `path_effects` keyword in `plot()` (#2127). | |
104 | - Fix `GeoDataFrame.explode()` to preserve `attrs` (#1935) | |
224 | - Fix handling of the ``path_effects`` keyword in ``plot()`` (#2127). | |
225 | - Fix ``GeoDataFrame.explode()`` to preserve ``attrs`` (#1935) | |
105 | 226 | |
106 | 227 | Notes on (optional) dependencies: |
107 | 228 | |
124 | 245 | |
125 | 246 | New features and improvements: |
126 | 247 | |
127 | - The `geopandas.read_file` function now accepts more general | |
128 | file-like objects (e.g. `fsspec` open file objects). It will now also | |
248 | - The ``geopandas.read_file`` function now accepts more general | |
249 | file-like objects (e.g. ``fsspec`` open file objects). It will now also | |
129 | 250 | automatically recognize zipped files (#1535). |
130 | - The `GeoDataFrame.plot()` method now provides access to the pandas plotting | |
131 | functionality for the non-geometry columns, either using the `kind` keyword | |
132 | or the accessor method (e.g. `gdf.plot(kind="bar")` or `gdf.plot.bar()`) | |
251 | - The ``GeoDataFrame.plot()`` method now provides access to the pandas plotting | |
252 | functionality for the non-geometry columns, either using the ``kind`` keyword | |
253 | or the accessor method (e.g. ``gdf.plot(kind="bar")`` or ``gdf.plot.bar()``) | |
133 | 254 | (#1465). |
134 | - New `from_wkt()`, `from_wkb()`, `to_wkt()`, `to_wkb()` methods for | |
255 | - New ``from_wkt()``, ``from_wkb()``, ``to_wkt()``, ``to_wkb()`` methods for | |
135 | 256 | GeoSeries to construct a GeoSeries from geometries in WKT or WKB |
136 | 257 | representation, or to convert a GeoSeries to a pandas Seriew with WKT or WKB |
137 | 258 | values (#1710). |
138 | - New `GeoSeries.z` attribute to access the z-coordinates of Point geometries | |
139 | (similar to the existing `.x` and `.y` attributes) (#1773). | |
140 | - The `to_crs()` method now handles missing values (#1618). | |
141 | - Support for pandas' new `.attrs` functionality (#1658). | |
142 | - The `dissolve()` method now allows dissolving by no column (`by=None`) to | |
259 | - New ``GeoSeries.z`` attribute to access the z-coordinates of Point geometries | |
260 | (similar to the existing ``.x`` and ``.y`` attributes) (#1773). | |
261 | - The ``to_crs()`` method now handles missing values (#1618). | |
262 | - Support for pandas' new ``.attrs`` functionality (#1658). | |
263 | - The ``dissolve()`` method now allows dissolving by no column (``by=None``) to | |
143 | 264 | create a union of all geometries (single-row GeoDataFrame) (#1568). |
144 | - New `estimate_utm_crs()` method on GeoSeries/GeoDataFrame to determine the | |
265 | - New ``estimate_utm_crs()`` method on GeoSeries/GeoDataFrame to determine the | |
145 | 266 | UTM CRS based on the bounds (#1646). |
146 | - `GeoDataFrame.from_dict()` now accepts `geometry` and `crs` keywords | |
267 | - ``GeoDataFrame.from_dict()`` now accepts ``geometry`` and ``crs`` keywords | |
147 | 268 | (#1619). |
148 | - `GeoDataFrame.to_postgis()` and `geopandas.read_postgis()` now supports | |
269 | - ``GeoDataFrame.to_postgis()`` and ``geopandas.read_postgis()`` now supports | |
149 | 270 | both sqlalchemy engine and connection objects (#1638). |
150 | - The `GeoDataFrame.explode()` method now allows exploding based on a | |
271 | - The ``GeoDataFrame.explode()`` method now allows exploding based on a | |
151 | 272 | non-geometry column, using the pandas implementation (#1720). |
152 | - Performance improvement in `GeoDataFrame/GeoSeries.explode()` when using | |
273 | - Performance improvement in ``GeoDataFrame/GeoSeries.explode()`` when using | |
153 | 274 | the PyGEOS backend (#1693). |
154 | - The binary operation and predicate methods (eg `intersection()`, | |
155 | `intersects()`) have a new `align` keyword which allows optionally not | |
156 | aligning on the index before performing the operation with `align=False` | |
275 | - The binary operation and predicate methods (eg ``intersection()``, | |
276 | ``intersects()``) have a new ``align`` keyword which allows optionally not | |
277 | aligning on the index before performing the operation with ``align=False`` | |
157 | 278 | (#1668). |
158 | - The `GeoDataFrame.dissolve()` method now supports all relevant keywords of | |
159 | `groupby()`, i.e. the `level`, `sort`, `observed` and `dropna` keywords | |
279 | - The ``GeoDataFrame.dissolve()`` method now supports all relevant keywords of | |
280 | ``groupby()``, i.e. the ``level``, ``sort``, ``observed`` and ``dropna`` keywords | |
160 | 281 | (#1845). |
161 | - The `geopandas.overlay()` function now accepts `make_valid=False` to skip | |
162 | the step to ensure the input geometries are valid using `buffer(0)` (#1802). | |
163 | - The `GeoDataFrame.to_json()` method gained a `drop_id` keyword to | |
282 | - The ``geopandas.overlay()`` function now accepts ``make_valid=False`` to skip | |
283 | the step to ensure the input geometries are valid using ``buffer(0)`` (#1802). | |
284 | - The ``GeoDataFrame.to_json()`` method gained a ``drop_id`` keyword to | |
164 | 285 | optionally not write the GeoDataFrame's index as the "id" field in the |
165 | 286 | resulting JSON (#1637). |
166 | - A new `aspect` keyword in the plotting methods to optionally allow retaining | |
287 | - A new ``aspect`` keyword in the plotting methods to optionally allow retaining | |
167 | 288 | the original aspect (#1512) |
168 | - A new `interval` keyword in the `legend_kwds` group of the `plot()` method | |
289 | - A new ``interval`` keyword in the ``legend_kwds`` group of the ``plot()`` method | |
169 | 290 | to control the appearance of the legend labels when using a classification |
170 | 291 | scheme (#1605). |
171 | - The spatial index of a GeoSeries (accessed with the `sindex` attribute) is | |
292 | - The spatial index of a GeoSeries (accessed with the ``sindex`` attribute) is | |
172 | 293 | now stored on the underlying array. This ensures that the spatial index is |
173 | 294 | preserved in more operations where possible, and that multiple geometry |
174 | 295 | columns of a GeoDataFrame can each have a spatial index (#1444). |
175 | - Addition of a `has_sindex` attribute on the GeoSeries/GeoDataFrame to check | |
296 | - Addition of a ``has_sindex`` attribute on the GeoSeries/GeoDataFrame to check | |
176 | 297 | if a spatial index has already been initialized (#1627). |
177 | - The `geopandas.testing.assert_geoseries_equal()` and `assert_geodataframe_equal()` | |
178 | testing utilities now have a `normalize` keyword (False by default) to | |
298 | - The ``geopandas.testing.assert_geoseries_equal()`` and ``assert_geodataframe_equal()`` | |
299 | testing utilities now have a ``normalize`` keyword (False by default) to | |
179 | 300 | normalize geometries before comparing for equality (#1826). Those functions |
180 | 301 | now also give a more informative error message when failing (#1808). |
181 | 302 | |
182 | 303 | Deprecations and compatibility notes: |
183 | 304 | |
184 | - The `is_ring` attribute currently returns True for Polygons. In the future, | |
305 | - The ``is_ring`` attribute currently returns True for Polygons. In the future, | |
185 | 306 | this will be False (#1631). In addition, start to check it for LineStrings |
186 | 307 | and LinearRings (instead of always returning False). |
187 | - The deprecated `objects` keyword in the `intersection()` method of the | |
188 | `GeoDataFrame/GeoSeries.sindex` spatial index object has been removed | |
308 | - The deprecated ``objects`` keyword in the ``intersection()`` method of the | |
309 | ``GeoDataFrame/GeoSeries.sindex`` spatial index object has been removed | |
189 | 310 | (#1444). |
190 | 311 | |
191 | 312 | Bug fixes: |
192 | 313 | |
193 | - Fix regression in the `plot()` method raising an error with empty | |
314 | - Fix regression in the ``plot()`` method raising an error with empty | |
194 | 315 | geometries (#1702, #1828). |
195 | - Fix `geopandas.overlay()` to preserve geometries of the correct type which | |
316 | - Fix ``geopandas.overlay()`` to preserve geometries of the correct type which | |
196 | 317 | are nested within a GeometryCollection as a result of the overlay |
197 | 318 | operation (#1582). In addition, a warning will now be raised if geometries |
198 | 319 | of different type are dropped from the result (#1554). |
199 | 320 | - Fix the repr of an empty GeoSeries to not show spurious warnings (#1673). |
200 | - Fix the `.crs` for empty GeoDataFrames (#1560). | |
201 | - Fix `geopandas.clip` to preserve the correct geometry column name (#1566). | |
202 | - Fix bug in `plot()` method when using `legend_kwds` with multiple subplots | |
321 | - Fix the ``.crs`` for empty GeoDataFrames (#1560). | |
322 | - Fix ``geopandas.clip`` to preserve the correct geometry column name (#1566). | |
323 | - Fix bug in ``plot()`` method when using ``legend_kwds`` with multiple subplots | |
203 | 324 | (#1583) |
204 | - Fix spurious warning with `missing_kwds` keyword of the `plot()` method | |
325 | - Fix spurious warning with ``missing_kwds`` keyword of the ``plot()`` method | |
205 | 326 | when there are no areas with missing data (#1600). |
206 | - Fix the `plot()` method to correctly align values passed to the `column` | |
327 | - Fix the ``plot()`` method to correctly align values passed to the ``column`` | |
207 | 328 | keyword as a pandas Series (#1670). |
208 | 329 | - Fix bug in plotting MultiPoints when passing values to determine the color |
209 | 330 | (#1694) |
210 | - The `rename_geometry()` method now raises a more informative error message | |
331 | - The ``rename_geometry()`` method now raises a more informative error message | |
211 | 332 | when a duplicate column name is used (#1602). |
212 | - Fix `explode()` method to preserve the CRS (#1655) | |
213 | - Fix the `GeoSeries.apply()` method to again accept the `convert_dtype` | |
333 | - Fix ``explode()`` method to preserve the CRS (#1655) | |
334 | - Fix the ``GeoSeries.apply()`` method to again accept the ``convert_dtype`` | |
214 | 335 | keyword to be consistent with pandas (#1636). |
215 | - Fix `GeoDataFrame.apply()` to preserve the CRS when possible (#1848). | |
216 | - Fix bug in containment test as `geom in geoseries` (#1753). | |
217 | - The `shift()` method of a GeoSeries/GeoDataFrame now preserves the CRS | |
336 | - Fix ``GeoDataFrame.apply()`` to preserve the CRS when possible (#1848). | |
337 | - Fix bug in containment test as ``geom in geoseries`` (#1753). | |
338 | - The ``shift()`` method of a GeoSeries/GeoDataFrame now preserves the CRS | |
218 | 339 | (#1744). |
219 | 340 | - The PostGIS IO functionality now quotes table names to ensure it works with |
220 | 341 | case-sensitive names (#1825). |
221 | - Fix the `GeoSeries` constructor without passing data but only an index (#1798). | |
342 | - Fix the ``GeoSeries`` constructor without passing data but only an index (#1798). | |
222 | 343 | |
223 | 344 | Notes on (optional) dependencies: |
224 | 345 | |
225 | 346 | - GeoPandas 0.9.0 dropped support for Python 3.5. Further, the minimum |
226 | 347 | required versions are pandas 0.24, numpy 1.15 and shapely 1.6 and fiona 1.8. |
227 | - The `descartes` package is no longer required for plotting polygons. This | |
348 | - The ``descartes`` package is no longer required for plotting polygons. This | |
228 | 349 | functionality is now included by default in GeoPandas itself, when |
229 | 350 | matplotlib is available (#1677). |
230 | - Fiona is now only imported when used in `read_file`/`to_file`. This means | |
351 | - Fiona is now only imported when used in ``read_file``/``to_file``. This means | |
231 | 352 | you can now force geopandas to install without fiona installed (although it |
232 | 353 | is still a default requirement) (#1775). |
233 | 354 | - Compatibility with the upcoming Shapely 1.8 (#1659, #1662, #1819). |
244 | 365 | |
245 | 366 | Small bug-fix release: |
246 | 367 | |
247 | - Fix a regression in the `plot()` method when visualizing with a | |
368 | - Fix a regression in the ``plot()`` method when visualizing with a | |
248 | 369 | JenksCaspallSampled or FisherJenksSampled scheme (#1486). |
249 | - Fix spurious warning in `GeoDataFrame.to_postgis` (#1497). | |
250 | - Fix the un-pickling with `pd.read_pickle` of files written with older | |
370 | - Fix spurious warning in ``GeoDataFrame.to_postgis`` (#1497). | |
371 | - Fix the un-pickling with ``pd.read_pickle`` of files written with older | |
251 | 372 | GeoPandas versions (#1511). |
252 | 373 | |
253 | 374 | |
257 | 378 | **Experimental**: optional use of PyGEOS to speed up spatial operations (#1155). |
258 | 379 | PyGEOS is a faster alternative for Shapely (being contributed back to a future |
259 | 380 | version of Shapely), and is used in element-wise spatial operations and for |
260 | spatial index in e.g. `sjoin` (#1343, #1401, #1421, #1427, #1428). See the | |
381 | spatial index in e.g. ``sjoin`` (#1343, #1401, #1421, #1427, #1428). See the | |
261 | 382 | [installation docs](https://geopandas.readthedocs.io/en/latest/install.html#using-the-optional-pygeos-dependency) |
262 | 383 | for more info and how to enable it. |
263 | 384 | |
265 | 386 | |
266 | 387 | - IO enhancements: |
267 | 388 | |
268 | - New `GeoDataFrame.to_postgis()` method to write to PostGIS database (#1248). | |
389 | - New ``GeoDataFrame.to_postgis()`` method to write to PostGIS database (#1248). | |
269 | 390 | - New Apache Parquet and Feather file format support (#1180, #1435) |
270 | - Allow appending to files with `GeoDataFrame.to_file` (#1229). | |
271 | - Add support for the `ignore_geometry` keyword in `read_file` to only read | |
391 | - Allow appending to files with ``GeoDataFrame.to_file`` (#1229). | |
392 | - Add support for the ``ignore_geometry`` keyword in ``read_file`` to only read | |
272 | 393 | the attribute data. If set to True, a pandas DataFrame without geometry is |
273 | 394 | returned (#1383). |
274 | - `geopandas.read_file` now supports reading from file-like objects (#1329). | |
275 | - `GeoDataFrame.to_file` now supports specifying the CRS to write to the file | |
395 | - ``geopandas.read_file`` now supports reading from file-like objects (#1329). | |
396 | - ``GeoDataFrame.to_file`` now supports specifying the CRS to write to the file | |
276 | 397 | (#802). By default it still uses the CRS of the GeoDataFrame. |
277 | - New `chunksize` keyword in `geopandas.read_postgis` to read a query in | |
398 | - New ``chunksize`` keyword in ``geopandas.read_postgis`` to read a query in | |
278 | 399 | chunks (#1123). |
279 | 400 | |
280 | 401 | - Improvements related to geometry columns and CRS: |
282 | 403 | - Any column of the GeoDataFrame that has a "geometry" dtype is now returned |
283 | 404 | as a GeoSeries. This means that when having multiple geometry columns, not |
284 | 405 | only the "active" geometry column is returned as a GeoSeries, but also |
285 | accessing another geometry column (`gdf["other_geom_column"]`) gives a | |
406 | accessing another geometry column (``gdf["other_geom_column"]``) gives a | |
286 | 407 | GeoSeries (#1336). |
287 | 408 | - Multiple geometry columns in a GeoDataFrame can now each have a different |
288 | CRS. The global `gdf.crs` attribute continues to returns the CRS of the | |
409 | CRS. The global ``gdf.crs`` attribute continues to returns the CRS of the | |
289 | 410 | "active" geometry column. The CRS of other geometry columns can be accessed |
290 | from the column itself (eg `gdf["other_geom_column"].crs`) (#1339). | |
291 | - New `set_crs()` method on GeoDataFrame/GeoSeries to set the CRS of naive | |
411 | from the column itself (eg ``gdf["other_geom_column"].crs``) (#1339). | |
412 | - New ``set_crs()`` method on GeoDataFrame/GeoSeries to set the CRS of naive | |
292 | 413 | geometries (#747). |
293 | 414 | |
294 | 415 | - Improvements related to plotting: |
295 | 416 | |
296 | 417 | - The y-axis is now scaled depending on the center of the plot when using a |
297 | 418 | geographic CRS, instead of using an equal aspect ratio (#1290). |
298 | - When passing a column of categorical dtype to the `column=` keyword of the | |
299 | GeoDataFrame `plot()`, we now honor all categories and its order (#1483). | |
300 | In addition, a new `categories` keyword allows to specify all categories | |
419 | - When passing a column of categorical dtype to the ``column=`` keyword of the | |
420 | GeoDataFrame ``plot()``, we now honor all categories and its order (#1483). | |
421 | In addition, a new ``categories`` keyword allows to specify all categories | |
301 | 422 | and their order otherwise (#1173). |
302 | - For choropleths using a classification scheme (using `scheme=`), the | |
303 | `legend_kwds` accept two new keywords to control the formatting of the | |
304 | legend: `fmt` with a format string for the bin edges (#1253), and `labels` | |
423 | - For choropleths using a classification scheme (using ``scheme=``), the | |
424 | ``legend_kwds`` accept two new keywords to control the formatting of the | |
425 | legend: ``fmt`` with a format string for the bin edges (#1253), and ``labels`` | |
305 | 426 | to pass fully custom class labels (#1302). |
306 | 427 | |
307 | - New `covers()` and `covered_by()` methods on GeoSeries/GeoDataframe for the | |
428 | - New ``covers()`` and ``covered_by()`` methods on GeoSeries/GeoDataframe for the | |
308 | 429 | equivalent spatial predicates (#1460, #1462). |
309 | 430 | - GeoPandas now warns when using distance-based methods with data in a |
310 | 431 | geographic projection (#1378). |
313 | 434 | |
314 | 435 | - When constructing a GeoSeries or GeoDataFrame from data that already has a |
315 | 436 | CRS, a deprecation warning is raised when both CRS don't match, and in the |
316 | future an error will be raised in such a case. You can use the new `set_crs` | |
437 | future an error will be raised in such a case. You can use the new ``set_crs`` | |
317 | 438 | method to override an existing CRS. See |
318 | 439 | [the docs](https://geopandas.readthedocs.io/en/latest/projections.html#projection-for-multiple-geometry-columns). |
319 | - The helper functions in the `geopandas.plotting` module are deprecated for | |
440 | - The helper functions in the ``geopandas.plotting`` module are deprecated for | |
320 | 441 | public usage (#656). |
321 | - The `geopandas.io` functions are deprecated, use the top-level `read_file` and | |
322 | `to_file` instead (#1407). | |
323 | - The set operators (`&`, `|`, `^`, `-`) are deprecated, use the | |
324 | `intersection()`, `union()`, `symmetric_difference()`, `difference()` methods | |
442 | - The ``geopandas.io`` functions are deprecated, use the top-level ``read_file`` and | |
443 | ``to_file`` instead (#1407). | |
444 | - The set operators (``&``, ``|``, ``^``, ``-``) are deprecated, use the | |
445 | ``intersection()``, ``union()``, ``symmetric_difference()``, ``difference()`` methods | |
325 | 446 | instead (#1255). |
326 | - The `sindex` for empty dataframe will in the future return an empty spatial | |
327 | index instead of `None` (#1438). | |
328 | - The `objects` keyword in the `intersection` method of the spatial index | |
329 | returned by the `sindex` attribute is deprecated and will be removed in the | |
447 | - The ``sindex`` for empty dataframe will in the future return an empty spatial | |
448 | index instead of ``None`` (#1438). | |
449 | - The ``objects`` keyword in the ``intersection`` method of the spatial index | |
450 | returned by the ``sindex`` attribute is deprecated and will be removed in the | |
330 | 451 | future (#1440). |
331 | 452 | |
332 | 453 | Bug fixes: |
333 | 454 | |
334 | - Fix the `total_bounds()` method to ignore missing and empty geometries (#1312). | |
335 | - Fix `geopandas.clip` when masking with non-overlapping area resulting in an | |
455 | - Fix the ``total_bounds()`` method to ignore missing and empty geometries (#1312). | |
456 | - Fix ``geopandas.clip`` when masking with non-overlapping area resulting in an | |
336 | 457 | empty GeoDataFrame (#1309, #1365). |
337 | - Fix error in `geopandas.sjoin` when joining on an empty geometry column (#1318). | |
338 | - CRS related fixes: `pandas.concat` preserves CRS when concatenating GeoSeries | |
339 | objects (#1340), preserve the CRS in `geopandas.clip` (#1362) and in | |
340 | `GeoDataFrame.astype` (#1366). | |
341 | - Fix bug in `GeoDataFrame.explode()` when 'level_1' is one of the column names | |
458 | - Fix error in ``geopandas.sjoin`` when joining on an empty geometry column (#1318). | |
459 | - CRS related fixes: ``pandas.concat`` preserves CRS when concatenating GeoSeries | |
460 | objects (#1340), preserve the CRS in ``geopandas.clip`` (#1362) and in | |
461 | ``GeoDataFrame.astype`` (#1366). | |
462 | - Fix bug in ``GeoDataFrame.explode()`` when 'level_1' is one of the column names | |
342 | 463 | (#1445). |
343 | 464 | - Better error message when rtree is not installed (#1425). |
344 | - Fix bug in `GeoSeries.equals()` (#1451). | |
465 | - Fix bug in ``GeoSeries.equals()`` (#1451). | |
345 | 466 | - Fix plotting of multi-part geometries with additional style keywords (#1385). |
346 | 467 | |
347 | And we now have a [Code of Conduct](https://github.com/geopandas/geopandas/blob/master/CODE_OF_CONDUCT.md)! | |
468 | And we now have a [Code of Conduct](https://github.com/geopandas/geopandas/blob/main/CODE_OF_CONDUCT.md)! | |
348 | 469 | |
349 | 470 | GeoPandas 0.8.0 is the last release to support Python 3.5. The next release |
350 | 471 | will require Python 3.6, pandas 0.24, numpy 1.15 and shapely 1.6 or higher. |
356 | 477 | Support for Python 2.7 has been dropped. GeoPandas now works with Python >= 3.5. |
357 | 478 | |
358 | 479 | The important API change of this release is that GeoPandas now requires |
359 | PROJ > 6 and pyproj > 2.2, and that the `.crs` attribute of a GeoSeries and | |
480 | PROJ > 6 and pyproj > 2.2, and that the ``.crs`` attribute of a GeoSeries and | |
360 | 481 | GeoDataFrame no longer stores the CRS information as a proj4 string or dict, |
361 | 482 | but as a ``pyproj.CRS`` object (#1101). |
362 | 483 | |
367 | 488 | |
368 | 489 | Other API changes; |
369 | 490 | |
370 | - The `GeoDataFrame.to_file` method will now also write the GeoDataFrame index | |
491 | - The ``GeoDataFrame.to_file`` method will now also write the GeoDataFrame index | |
371 | 492 | to the file, if the index is named and/or non-integer. You can use the |
372 | `index=True/False` keyword to overwrite this default inference (#1059). | |
493 | ``index=True/False`` keyword to overwrite this default inference (#1059). | |
373 | 494 | |
374 | 495 | New features and improvements: |
375 | 496 | |
376 | - A new `geopandas.clip` function to clip a GeoDataFrame to the spatial extent | |
497 | - A new ``geopandas.clip`` function to clip a GeoDataFrame to the spatial extent | |
377 | 498 | of another shape (#1128). |
378 | - The `geopandas.overlay` function now works for all geometry types, including | |
499 | - The ``geopandas.overlay`` function now works for all geometry types, including | |
379 | 500 | points and linestrings in addition to polygons (#1110). |
380 | - The `plot()` method gained support for missing values (in the column that | |
501 | - The ``plot()`` method gained support for missing values (in the column that | |
381 | 502 | determines the colors). By default it doesn't plot the corresponding |
382 | geometries, but using the new `missing_kwds` argument you can specify how to | |
503 | geometries, but using the new ``missing_kwds`` argument you can specify how to | |
383 | 504 | style those geometries (#1156). |
384 | - The `plot()` method now also supports plotting GeometryCollection and | |
505 | - The ``plot()`` method now also supports plotting GeometryCollection and | |
385 | 506 | LinearRing objects (#1225). |
386 | 507 | - Added support for filtering with a geometry or reading a subset of the rows in |
387 | `geopandas.read_file` (#1160). | |
508 | ``geopandas.read_file`` (#1160). | |
388 | 509 | - Added support for the new nullable integer data type of pandas in |
389 | `GeoDataFrame.to_file` (#1220). | |
390 | ||
391 | Bug fixes: | |
392 | ||
393 | - `GeoSeries.reset_index()` now correctly results in a GeoDataFrame instead of DataFrame (#1252). | |
394 | - Fixed the `geopandas.sjoin` function to handle MultiIndex correctly (#1159). | |
395 | - Fixed the `geopandas.sjoin` function to preserve the index name of the left GeoDataFrame (#1150). | |
510 | ``GeoDataFrame.to_file`` (#1220). | |
511 | ||
512 | Bug fixes: | |
513 | ||
514 | - ``GeoSeries.reset_index()`` now correctly results in a GeoDataFrame instead of DataFrame (#1252). | |
515 | - Fixed the ``geopandas.sjoin`` function to handle MultiIndex correctly (#1159). | |
516 | - Fixed the ``geopandas.sjoin`` function to preserve the index name of the left GeoDataFrame (#1150). | |
396 | 517 | |
397 | 518 | |
398 | 519 | Version 0.6.3 (February 6, 2020) |
401 | 522 | Small bug-fix release: |
402 | 523 | |
403 | 524 | - Compatibility with Shapely 1.7 and pandas 1.0 (#1244). |
404 | - Fix `GeoDataFrame.fillna` to accept non-geometry values again when there are | |
525 | - Fix ``GeoDataFrame.fillna`` to accept non-geometry values again when there are | |
405 | 526 | no missing values in the geometry column. This should make it easier to fill |
406 | 527 | the numerical columns of the GeoDataFrame (#1279). |
407 | 528 | |
424 | 545 | |
425 | 546 | Small bug-fix release fixing a few regressions: |
426 | 547 | |
427 | - Fix `astype` when converting to string with Multi geometries (#1145) or when converting a dataframe without geometries (#1144). | |
428 | - Fix `GeoSeries.fillna` to accept `np.nan` again (#1149). | |
548 | - Fix ``astype`` when converting to string with Multi geometries (#1145) or when converting a dataframe without geometries (#1144). | |
549 | - Fix ``GeoSeries.fillna`` to accept ``np.nan`` again (#1149). | |
429 | 550 | |
430 | 551 | |
431 | 552 | Version 0.6.0 (September 27, 2019) |
437 | 558 | |
438 | 559 | - A refactor of the internals based on the pandas ExtensionArray interface (#1000). The main user visible changes are: |
439 | 560 | |
440 | - The `.dtype` of a GeoSeries is now a `'geometry'` dtype (and no longer a numpy `object` dtype). | |
441 | - The `.values` of a GeoSeries now returns a custom `GeometryArray`, and no longer a numpy array. To get back a numpy array of Shapely scalars, you can convert explicitly using `np.asarray(..)`. | |
442 | ||
443 | - The `GeoSeries` constructor now raises a warning when passed non-geometry data. Currently the constructor falls back to return a pandas `Series`, but in the future this will raise an error (#1085). | |
561 | - The ``.dtype`` of a GeoSeries is now a ``'geometry'`` dtype (and no longer a numpy ``object`` dtype). | |
562 | - The ``.values`` of a GeoSeries now returns a custom ``GeometryArray``, and no longer a numpy array. To get back a numpy array of Shapely scalars, you can convert explicitly using ``np.asarray(..)``. | |
563 | ||
564 | - The ``GeoSeries`` constructor now raises a warning when passed non-geometry data. Currently the constructor falls back to return a pandas ``Series``, but in the future this will raise an error (#1085). | |
444 | 565 | - The missing value handling has been changed to now separate the concepts of missing geometries and empty geometries (#601, 1062). In practice this means that (see [the docs](https://geopandas.readthedocs.io/en/v0.6.0/missing_empty.html) for more details): |
445 | 566 | |
446 | - `GeoSeries.isna` now considers only missing values, and if you want to check for empty geometries, you can use `GeoSeries.is_empty` (`GeoDataFrame.isna` already only looked at missing values). | |
447 | - `GeoSeries.dropna` now actually drops missing values (before it didn't drop either missing or empty geometries) | |
448 | - `GeoSeries.fillna` only fills missing values (behaviour unchanged). | |
449 | - `GeoSeries.align` uses missing values instead of empty geometries by default to fill non-matching index entries. | |
567 | - ``GeoSeries.isna`` now considers only missing values, and if you want to check for empty geometries, you can use ``GeoSeries.is_empty`` (``GeoDataFrame.isna`` already only looked at missing values). | |
568 | - ``GeoSeries.dropna`` now actually drops missing values (before it didn't drop either missing or empty geometries) | |
569 | - ``GeoSeries.fillna`` only fills missing values (behaviour unchanged). | |
570 | - ``GeoSeries.align`` uses missing values instead of empty geometries by default to fill non-matching index entries. | |
450 | 571 | |
451 | 572 | New features and improvements: |
452 | 573 | |
453 | - Addition of a `GeoSeries.affine_transform` method, equivalent of Shapely's function (#1008). | |
454 | - Addition of a `GeoDataFrame.rename_geometry` method to easily rename the active geometry column (#1053). | |
455 | - Addition of `geopandas.show_versions()` function, which can be used to give an overview of the installed libraries in bug reports (#899). | |
456 | - The `legend_kwds` keyword of the `plot()` method can now also be used to specify keywords for the color bar (#1102). | |
457 | - Performance improvement in the `sjoin()` operation by re-using existing spatial index of the input dataframes, if available (#789). | |
574 | - Addition of a ``GeoSeries.affine_transform`` method, equivalent of Shapely's function (#1008). | |
575 | - Addition of a ``GeoDataFrame.rename_geometry`` method to easily rename the active geometry column (#1053). | |
576 | - Addition of ``geopandas.show_versions()`` function, which can be used to give an overview of the installed libraries in bug reports (#899). | |
577 | - The ``legend_kwds`` keyword of the ``plot()`` method can now also be used to specify keywords for the color bar (#1102). | |
578 | - Performance improvement in the ``sjoin()`` operation by re-using existing spatial index of the input dataframes, if available (#789). | |
458 | 579 | - Updated documentation to work with latest version of geoplot and contextily (#1044, #1088). |
459 | 580 | - A new ``geopandas.options`` configuration, with currently a single option to control the display precision of the coordinates (``options.display_precision``). The default is now to show less coordinates (3 for projected and 5 for geographic coordinates), but the default can be overridden with the option. |
460 | 581 | |
461 | 582 | Bug fixes: |
462 | 583 | |
463 | - Also try to use `pysal` instead of `mapclassify` if available (#1082). | |
464 | - The `GeoDataFrame.astype()` method now correctly returns a `GeoDataFrame` if the geometry column is preserved (#1009). | |
465 | - The `to_crs` method now uses `always_xy=True` to ensure correct lon/lat order handling for pyproj>=2.2.0 (#1122). | |
466 | - Fixed passing list-like colors in the `plot()` method in case of "multi" geometries (#1119). | |
467 | - Fixed the coloring of shapes and colorbar when passing a custom `norm` in the `plot()` method (#1091, #1089). | |
468 | - Fixed `GeoDataFrame.to_file` to preserve VFS file paths (e.g. when a "s3://" path is specified) (#1124). | |
584 | - Also try to use ``pysal`` instead of ``mapclassify`` if available (#1082). | |
585 | - The ``GeoDataFrame.astype()`` method now correctly returns a ``GeoDataFrame`` if the geometry column is preserved (#1009). | |
586 | - The ``to_crs`` method now uses ``always_xy=True`` to ensure correct lon/lat order handling for pyproj>=2.2.0 (#1122). | |
587 | - Fixed passing list-like colors in the ``plot()`` method in case of "multi" geometries (#1119). | |
588 | - Fixed the coloring of shapes and colorbar when passing a custom ``norm`` in the ``plot()`` method (#1091, #1089). | |
589 | - Fixed ``GeoDataFrame.to_file`` to preserve VFS file paths (e.g. when a "s3://" path is specified) (#1124). | |
469 | 590 | - Fixed failing case in ``geopandas.sjoin`` with empty geometries (#1138). |
470 | 591 | |
471 | 592 | |
482 | 603 | |
483 | 604 | Improvements: |
484 | 605 | |
485 | * Significant performance improvement (around 10x) for `GeoDataFrame.iterfeatures`, | |
486 | which also improves `GeoDataFrame.to_file` (#864). | |
487 | * File IO enhancements based on Fiona 1.8: | |
488 | ||
489 | * Support for writing bool dtype (#855) and datetime dtype, if the file format supports it (#728). | |
490 | * Support for writing dataframes with multiple geometry types, if the file format allows it (e.g. GeoJSON for all types, or ESRI Shapefile for Polygon+MultiPolygon) (#827, #867, #870). | |
491 | ||
492 | * Compatibility with pyproj >= 2 (#962). | |
493 | * A new `geopandas.points_from_xy()` helper function to convert x and y coordinates to Point objects (#896). | |
494 | * The `buffer` and `interpolate` methods now accept an array-like to specify a variable distance for each geometry (#781). | |
495 | * Addition of a `relate` method, corresponding to the shapely method that returns the DE-9IM matrix (#853). | |
496 | * Plotting improvements: | |
497 | ||
498 | * Performance improvement in plotting by only flattening the geometries if there are actually 'Multi' geometries (#785). | |
499 | * Choropleths: access to all `mapclassify` classification schemes and addition of the `classification_kwds` keyword in the `plot` method to specify options for the scheme (#876). | |
500 | * Ability to specify a matplotlib axes object on which to plot the color bar with the `cax` keyword, in order to have more control over the color bar placement (#894). | |
501 | ||
502 | * Changed the default provider in ``geopandas.tools.geocode`` from Google (now requires an API key) to Geocode.Farm (#907, #975). | |
606 | - Significant performance improvement (around 10x) for ``GeoDataFrame.iterfeatures``, | |
607 | which also improves ``GeoDataFrame.to_file`` (#864). | |
608 | - File IO enhancements based on Fiona 1.8: | |
609 | ||
610 | - Support for writing bool dtype (#855) and datetime dtype, if the file format supports it (#728). | |
611 | - Support for writing dataframes with multiple geometry types, if the file format allows it (e.g. GeoJSON for all types, or ESRI Shapefile for Polygon+MultiPolygon) (#827, #867, #870). | |
612 | ||
613 | - Compatibility with pyproj >= 2 (#962). | |
614 | - A new ``geopandas.points_from_xy()`` helper function to convert x and y coordinates to Point objects (#896). | |
615 | - The ``buffer`` and ``interpolate`` methods now accept an array-like to specify a variable distance for each geometry (#781). | |
616 | - Addition of a ``relate`` method, corresponding to the shapely method that returns the DE-9IM matrix (#853). | |
617 | - Plotting improvements: | |
618 | ||
619 | - Performance improvement in plotting by only flattening the geometries if there are actually 'Multi' geometries (#785). | |
620 | - Choropleths: access to all ``mapclassify`` classification schemes and addition of the ``classification_kwds`` keyword in the ``plot`` method to specify options for the scheme (#876). | |
621 | - Ability to specify a matplotlib axes object on which to plot the color bar with the ``cax`` keyword, in order to have more control over the color bar placement (#894). | |
622 | ||
623 | - Changed the default provider in ``geopandas.tools.geocode`` from Google (now requires an API key) to Geocode.Farm (#907, #975). | |
503 | 624 | |
504 | 625 | Bug fixes: |
505 | 626 | |
506 | 627 | - Remove the edge in the legend marker (#807). |
507 | - Fix the `align` method to preserve the CRS (#829). | |
508 | - Fix `geopandas.testing.assert_geodataframe_equal` to correctly compare left and right dataframes (#810). | |
628 | - Fix the ``align`` method to preserve the CRS (#829). | |
629 | - Fix ``geopandas.testing.assert_geodataframe_equal`` to correctly compare left and right dataframes (#810). | |
509 | 630 | - Fix in choropleth mapping when the values contain missing values (#877). |
510 | - Better error message in `sjoin` if the input is not a GeoDataFrame (#842). | |
511 | - Fix in `read_postgis` to handle nullable (missing) geometries (#856). | |
512 | - Correctly passing through the `parse_dates` keyword in `read_postgis` to the underlying pandas method (#860). | |
631 | - Better error message in ``sjoin`` if the input is not a GeoDataFrame (#842). | |
632 | - Fix in ``read_postgis`` to handle nullable (missing) geometries (#856). | |
633 | - Correctly passing through the ``parse_dates`` keyword in ``read_postgis`` to the underlying pandas method (#860). | |
513 | 634 | - Fixed the shape of Antarctica in the included demo dataset 'naturalearth_lowres' |
514 | 635 | (by updating to the latest version) (#804). |
515 | 636 | |
516 | ||
517 | 637 | Version 0.4.1 (March 5, 2019) |
518 | 638 | ----------------------------- |
519 | 639 | |
520 | 640 | Small bug-fix release for compatibility with the latest Fiona and PySAL |
521 | 641 | releases: |
522 | 642 | |
523 | * Compatibility with Fiona 1.8: fix deprecation warning (#854). | |
524 | * Compatibility with PySAL 2.0: switched to `mapclassify` instead of `PySAL` as | |
525 | dependency for choropleth mapping with the `scheme` keyword (#872). | |
526 | * Fix for new `overlay` implementation in case the intersection is empty (#800). | |
527 | ||
643 | - Compatibility with Fiona 1.8: fix deprecation warning (#854). | |
644 | - Compatibility with PySAL 2.0: switched to ``mapclassify`` instead of ``PySAL`` as | |
645 | dependency for choropleth mapping with the ``scheme`` keyword (#872). | |
646 | - Fix for new ``overlay`` implementation in case the intersection is empty (#800). | |
528 | 647 | |
529 | 648 | Version 0.4.0 (July 15, 2018) |
530 | 649 | ----------------------------- |
531 | 650 | |
532 | 651 | Improvements: |
533 | 652 | |
534 | * Improved `overlay` function (better performance, several incorrect behaviours fixed) (#429) | |
535 | * Pass keywords to control legend behavior (`legend_kwds`) to `plot` (#434) | |
536 | * Add basic support for reading remote datasets in `read_file` (#531) | |
537 | * Pass kwargs for `buffer` operation on GeoSeries (#535) | |
538 | * Expose all geopy services as options in geocoding (#550) | |
539 | * Faster write speeds to GeoPackage (#605) | |
540 | * Permit `read_file` filtering with a bounding box from a GeoDataFrame (#613) | |
541 | * Set CRS on GeoDataFrame returned by `read_postgis` (#627) | |
542 | * Permit setting markersize for Point GeoSeries plots with column values (#633) | |
543 | * Started an example gallery (#463, #690, #717) | |
544 | * Support for plotting MultiPoints (#683) | |
545 | * Testing functionality (e.g. `assert_geodataframe_equal`) is now publicly exposed (#707) | |
546 | * Add `explode` method to GeoDataFrame (similar to the GeoSeries method) (#671) | |
547 | * Set equal aspect on active axis on multi-axis figures (#718) | |
548 | * Pass array of values to column argument in `plot` (#770) | |
549 | ||
550 | Bug fixes: | |
551 | ||
552 | * Ensure that colorbars are plotted on the correct axis (#523) | |
553 | * Handle plotting empty GeoDataFrame (#571) | |
554 | * Save z-dimension when writing files (#652) | |
555 | * Handle reading empty shapefiles (#653) | |
556 | * Correct dtype for empty result of spatial operations (#685) | |
557 | * Fix empty `sjoin` handling for pandas>=0.23 (#762) | |
558 | ||
653 | - Improved ``overlay`` function (better performance, several incorrect behaviours fixed) (#429) | |
654 | - Pass keywords to control legend behavior (``legend_kwds``) to ``plot`` (#434) | |
655 | - Add basic support for reading remote datasets in ``read_file`` (#531) | |
656 | - Pass kwargs for ``buffer`` operation on GeoSeries (#535) | |
657 | - Expose all geopy services as options in geocoding (#550) | |
658 | - Faster write speeds to GeoPackage (#605) | |
659 | - Permit ``read_file`` filtering with a bounding box from a GeoDataFrame (#613) | |
660 | - Set CRS on GeoDataFrame returned by ``read_postgis`` (#627) | |
661 | - Permit setting markersize for Point GeoSeries plots with column values (#633) | |
662 | - Started an example gallery (#463, #690, #717) | |
663 | - Support for plotting MultiPoints (#683) | |
664 | - Testing functionality (e.g. ``assert_geodataframe_equal``) is now publicly exposed (#707) | |
665 | - Add ``explode`` method to GeoDataFrame (similar to the GeoSeries method) (#671) | |
666 | - Set equal aspect on active axis on multi-axis figures (#718) | |
667 | - Pass array of values to column argument in ``plot`` (#770) | |
668 | ||
669 | Bug fixes: | |
670 | ||
671 | - Ensure that colorbars are plotted on the correct axis (#523) | |
672 | - Handle plotting empty GeoDataFrame (#571) | |
673 | - Save z-dimension when writing files (#652) | |
674 | - Handle reading empty shapefiles (#653) | |
675 | - Correct dtype for empty result of spatial operations (#685) | |
676 | - Fix empty ``sjoin`` handling for pandas>=0.23 (#762) | |
559 | 677 | |
560 | 678 | Version 0.3.0 (August 29, 2017) |
561 | 679 | ------------------------------- |
562 | 680 | |
563 | 681 | Improvements: |
564 | 682 | |
565 | * Improve plotting performance using ``matplotlib.collections`` (#267) | |
566 | * Improve default plotting appearance. The defaults now follow the new matplotlib defaults (#318, #502, #510) | |
567 | * Provide access to x/y coordinates as attributes for Point GeoSeries (#383) | |
568 | * Make the NYBB dataset available through ``geopandas.datasets`` (#384) | |
569 | * Enable ``sjoin`` on non-integer-index GeoDataFrames (#422) | |
570 | * Add ``cx`` indexer to GeoDataFrame (#482) | |
571 | * ``GeoDataFrame.from_features`` now also accepts a Feature Collection (#225, #507) | |
572 | * Use index label instead of integer id in output of ``iterfeatures`` and | |
683 | - Improve plotting performance using ``matplotlib.collections`` (#267) | |
684 | - Improve default plotting appearance. The defaults now follow the new matplotlib defaults (#318, #502, #510) | |
685 | - Provide access to x/y coordinates as attributes for Point GeoSeries (#383) | |
686 | - Make the NYBB dataset available through ``geopandas.datasets`` (#384) | |
687 | - Enable ``sjoin`` on non-integer-index GeoDataFrames (#422) | |
688 | - Add ``cx`` indexer to GeoDataFrame (#482) | |
689 | - ``GeoDataFrame.from_features`` now also accepts a Feature Collection (#225, #507) | |
690 | - Use index label instead of integer id in output of ``iterfeatures`` and | |
573 | 691 | ``to_json`` (#421) |
574 | * Return empty data frame rather than raising an error when performing a spatial join with non overlapping geodataframes (#335) | |
575 | ||
576 | Bug fixes: | |
577 | ||
578 | * Compatibility with shapely 1.6.0 (#512) | |
579 | * Fix ``fiona.filter`` results when bbox is not None (#372) | |
580 | * Fix ``dissolve`` to retain CRS (#389) | |
581 | * Fix ``cx`` behavior when using index of 0 (#478) | |
582 | * Fix display of lower bin in legend label of choropleth plots using a PySAL scheme (#450) | |
583 | ||
692 | - Return empty data frame rather than raising an error when performing a spatial join with non overlapping geodataframes (#335) | |
693 | ||
694 | Bug fixes: | |
695 | ||
696 | - Compatibility with shapely 1.6.0 (#512) | |
697 | - Fix ``fiona.filter`` results when bbox is not None (#372) | |
698 | - Fix ``dissolve`` to retain CRS (#389) | |
699 | - Fix ``cx`` behavior when using index of 0 (#478) | |
700 | - Fix display of lower bin in legend label of choropleth plots using a PySAL scheme (#450) | |
584 | 701 | |
585 | 702 | Version 0.2.0 |
586 | 703 | ------------- |
587 | 704 | |
588 | 705 | Improvements: |
589 | 706 | |
590 | * Complete overhaul of the documentation | |
591 | * Addition of ``overlay`` to perform spatial overlays with polygons (#142) | |
592 | * Addition of ``sjoin`` to perform spatial joins (#115, #145, #188) | |
593 | * Addition of ``__geo_interface__`` that returns a python data structure | |
707 | - Complete overhaul of the documentation | |
708 | - Addition of ``overlay`` to perform spatial overlays with polygons (#142) | |
709 | - Addition of ``sjoin`` to perform spatial joins (#115, #145, #188) | |
710 | - Addition of ``__geo_interface__`` that returns a python data structure | |
594 | 711 | to represent the ``GeoSeries`` as a GeoJSON-like ``FeatureCollection`` (#116) |
595 | 712 | and ``iterfeatures`` method (#178) |
596 | * Addition of the ``explode`` (#146) and ``dissolve`` (#310, #311) methods. | |
597 | * Addition of the ``sindex`` attribute, a Spatial Index using the optional | |
713 | - Addition of the ``explode`` (#146) and ``dissolve`` (#310, #311) methods. | |
714 | - Addition of the ``sindex`` attribute, a Spatial Index using the optional | |
598 | 715 | dependency ``rtree`` (``libspatialindex``) that can be used to speed up |
599 | 716 | certain operations such as overlays (#140, #141). |
600 | * Addition of the ``GeoSeries.cx`` coordinate indexer to slice a GeoSeries based | |
717 | - Addition of the ``GeoSeries.cx`` coordinate indexer to slice a GeoSeries based | |
601 | 718 | on a bounding box of the coordinates (#55). |
602 | * Improvements to plotting: ability to specify edge colors (#173), support for | |
719 | - Improvements to plotting: ability to specify edge colors (#173), support for | |
603 | 720 | the ``vmin``, ``vmax``, ``figsize``, ``linewidth`` keywords (#207), legends |
604 | 721 | for chloropleth plots (#210), color points by specifying a colormap (#186) or |
605 | 722 | a single color (#238). |
606 | * Larger flexibility of ``to_crs``, accepting both dicts and proj strings (#289) | |
607 | * Addition of embedded example data, accessible through | |
723 | - Larger flexibility of ``to_crs``, accepting both dicts and proj strings (#289) | |
724 | - Addition of embedded example data, accessible through | |
608 | 725 | ``geopandas.datasets.get_path``. |
609 | 726 | |
610 | 727 | API changes: |
611 | 728 | |
612 | * In the ``plot`` method, the ``axes`` keyword is renamed to ``ax`` for | |
729 | - In the ``plot`` method, the ``axes`` keyword is renamed to ``ax`` for | |
613 | 730 | consistency with pandas, and the ``colormap`` keyword is renamed to ``cmap`` |
614 | 731 | for consistency with matplotlib (#208, #228, #240). |
615 | 732 | |
616 | 733 | Bug fixes: |
617 | 734 | |
618 | * Properly handle rows with missing geometries (#139, #193). | |
619 | * Fix ``GeoSeries.to_json`` (#263). | |
620 | * Correctly serialize metadata when pickling (#199, #206). | |
621 | * Fix ``merge`` and ``concat`` to return correct GeoDataFrame (#247, #320, #322). | |
735 | - Properly handle rows with missing geometries (#139, #193). | |
736 | - Fix ``GeoSeries.to_json`` (#263). | |
737 | - Correctly serialize metadata when pickling (#199, #206). | |
738 | - Fix ``merge`` and ``concat`` to return correct GeoDataFrame (#247, #320, #322). |
11 | 11 | In general, GeoPandas follows the conventions of the pandas project |
12 | 12 | where applicable. Please read the [contributing |
13 | 13 | guidelines](https://geopandas.readthedocs.io/en/latest/community/contributing.html). |
14 | ||
15 | 14 | |
16 | 15 | In particular, when submitting a pull request: |
17 | 16 | |
38 | 37 | Style |
39 | 38 | ----- |
40 | 39 | |
41 | - GeoPandas supports Python 3.7+ only. The last version of GeoPandas | |
40 | - GeoPandas supports Python 3.8+ only. The last version of GeoPandas | |
42 | 41 | supporting Python 2 is 0.6. |
43 | 42 | |
44 | 43 | - GeoPandas follows [the PEP 8 |
0 | GeoPandas [![Actions Status](https://github.com/geopandas/geopandas/workflows/Tests/badge.svg)](https://github.com/geopandas/geopandas/actions?query=workflow%3ATests) [![Coverage Status](https://codecov.io/gh/geopandas/geopandas/branch/master/graph/badge.svg)](https://codecov.io/gh/geopandas/geopandas) [![Join the chat at https://gitter.im/geopandas/geopandas](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/geopandas/geopandas?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/geopandas/geopandas/master) [![DOI](https://zenodo.org/badge/11002815.svg)](https://zenodo.org/badge/latestdoi/11002815) | |
1 | ========= | |
0 | [![pypi](https://img.shields.io/pypi/v/geopandas.svg)](https://pypi.python.org/pypi/geopandas/) | |
1 | [![Actions Status](https://github.com/geopandas/geopandas/workflows/Tests/badge.svg)](https://github.com/geopandas/geopandas/actions?query=workflow%3ATests) | |
2 | [![Coverage Status](https://codecov.io/gh/geopandas/geopandas/branch/main/graph/badge.svg)](https://codecov.io/gh/geopandas/geopandas) | |
3 | [![Join the chat at https://gitter.im/geopandas/geopandas](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/geopandas/geopandas?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |
4 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/geopandas/geopandas/main) | |
5 | [![DOI](https://zenodo.org/badge/11002815.svg)](https://zenodo.org/badge/latestdoi/11002815) | |
6 | ||
7 | GeoPandas | |
8 | --------- | |
2 | 9 | |
3 | 10 | Python tools for geographic data |
4 | 11 | |
34 | 41 | - ``shapely`` |
35 | 42 | - ``fiona`` |
36 | 43 | - ``pyproj`` |
44 | - ``packaging`` | |
37 | 45 | |
38 | 46 | Further, ``matplotlib`` is an optional dependency, required |
39 | 47 | for plotting, and [``rtree``](https://github.com/Toblerity/rtree) is an optional |
25 | 25 | |
26 | 26 | self.df1, self.df2 = df1, df2 |
27 | 27 | |
28 | def time_sjoin(self, op): | |
29 | sjoin(self.df1, self.df2, op=op) | |
28 | def time_sjoin(self, predicate): | |
29 | sjoin(self.df1, self.df2, predicate=predicate) |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.10 | |
5 | - cython | |
6 | # required | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - geos | |
11 | - packaging | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | #- geopy | |
20 | - SQLalchemy | |
21 | - libspatialite | |
22 | - pyarrow | |
23 | - pip | |
24 | - pip: | |
25 | - geopy | |
26 | - mapclassify>=2.4.0 | |
27 | # dev versions of packages | |
28 | - --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple | |
29 | - numpy | |
30 | - git+https://github.com/pandas-dev/pandas.git@main | |
31 | - git+https://github.com/matplotlib/matplotlib.git@main | |
32 | # - git+https://github.com/Toblerity/Shapely.git@main | |
33 | - git+https://github.com/pygeos/pygeos.git@master | |
34 | - git+https://github.com/python-visualization/folium.git@main | |
35 | - git+https://github.com/geopandas/xyzservices.git@main |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.10 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - pygeos | |
11 | - packaging | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | - matplotlib-base | |
20 | - mapclassify | |
21 | - folium | |
22 | - xyzservices | |
23 | - scipy | |
24 | - geopy | |
25 | # installed in tests.yaml, because not available on windows | |
26 | # - postgis | |
27 | - SQLalchemy | |
28 | - psycopg2 | |
29 | - libspatialite | |
30 | - geoalchemy2 | |
31 | - pyarrow | |
32 | # - pyogrio | |
33 | # doctest testing | |
34 | - pytest-doctestplus | |
35 | - pip | |
36 | - pip: | |
37 | - git+https://github.com/geopandas/pyogrio.git@main |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.7 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - pygeos | |
11 | # testing | |
12 | - pytest | |
13 | - pytest-cov | |
14 | - pytest-xdist | |
15 | - fsspec | |
16 | # optional | |
17 | - rtree | |
18 | - matplotlib | |
19 | - mapclassify | |
20 | - folium | |
21 | - xyzservices | |
22 | - scipy | |
23 | - geopy | |
24 | - SQLalchemy | |
25 | - libspatialite | |
26 | - pyarrow | |
27 |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | dependencies: | |
4 | - python=3.7 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - geos | |
11 | # testing | |
12 | - pytest | |
13 | - pytest-cov | |
14 | - pytest-xdist | |
15 | - fsspec | |
16 | # optional | |
17 | - rtree | |
18 | - matplotlib | |
19 | #- geopy | |
20 | - SQLalchemy | |
21 | - libspatialite | |
22 | - pip: | |
23 | - geopy | |
24 | - mapclassify | |
25 | - pyarrow | |
26 | - folium | |
27 | - xyzservices |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | - conda-forge | |
4 | dependencies: | |
5 | - python=3.7 | |
6 | # required | |
7 | - numpy=1.18 | |
8 | - pandas==0.25 | |
9 | - shapely=1.6 | |
10 | - fiona=1.8.13 | |
11 | #- pyproj | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | - matplotlib | |
20 | - matplotlib=3.1 | |
21 | # - mapclassify=2.4.0 - doesn't build due to conflicts | |
22 | - geopy | |
23 | - SQLalchemy | |
24 | - libspatialite | |
25 | - pyarrow | |
26 | - pip: | |
27 | - pyproj==2.2.2 |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | dependencies: | |
4 | - python=3.7 | |
5 | # required | |
6 | - pandas=1.0 | |
7 | - shapely | |
8 | - fiona | |
9 | - numpy=<1.19 | |
10 | #- pyproj | |
11 | - geos | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | - matplotlib | |
20 | #- geopy | |
21 | - SQLalchemy | |
22 | - libspatialite | |
23 | - pip | |
24 | - pip: | |
25 | - pyproj==3.0.1 | |
26 | - geopy | |
27 | - mapclassify==2.4.0 | |
28 | - pyarrow |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.8 | |
5 | - cython | |
6 | # required | |
7 | - fiona | |
8 | - pyproj | |
9 | - geos | |
10 | # testing | |
11 | - pytest | |
12 | - pytest-cov | |
13 | - pytest-xdist | |
14 | - fsspec | |
15 | # optional | |
16 | - rtree | |
17 | #- geopy | |
18 | - SQLalchemy | |
19 | - libspatialite | |
20 | - pyarrow | |
21 | - pip: | |
22 | - geopy | |
23 | - mapclassify>=2.4.0 | |
24 | # dev versions of packages | |
25 | - git+https://github.com/numpy/numpy.git@main | |
26 | - git+https://github.com/pydata/pandas.git@master | |
27 | - git+https://github.com/matplotlib/matplotlib.git@master | |
28 | - git+https://github.com/Toblerity/Shapely.git@master | |
29 | - git+https://github.com/pygeos/pygeos.git@master | |
30 | - git+https://github.com/python-visualization/folium.git@master | |
31 | - git+https://github.com/geopandas/xyzservices.git@main | |
32 |
3 | 3 | dependencies: |
4 | 4 | - python=3.8 |
5 | 5 | # required |
6 | - pandas=1.3.2 # temporary pin because 1.3.3 has regression for overlay (GH2101) | |
6 | - pandas | |
7 | 7 | - shapely |
8 | - fiona | |
8 | # - fiona # build with only pyogrio | |
9 | - libgdal | |
9 | 10 | - pyproj |
10 | 11 | - pygeos |
12 | - packaging | |
11 | 13 | # testing |
12 | 14 | - pytest |
13 | 15 | - pytest-cov |
14 | 16 | - pytest-xdist |
15 | - fsspec | |
17 | # - fsspec # to have one non-minimal build without fsspec | |
16 | 18 | # optional |
17 | 19 | - rtree |
18 | 20 | - matplotlib |
21 | 23 | - xyzservices |
22 | 24 | - scipy |
23 | 25 | - geopy |
24 | # installed in tests.yaml, because not available on windows | |
25 | # - postgis | |
26 | 26 | - SQLalchemy |
27 | - psycopg2 | |
28 | 27 | - libspatialite |
29 | - geoalchemy2 | |
30 | 28 | - pyarrow |
31 | # doctest testing | |
32 | - pytest-doctestplus⏎ | |
29 | - pip | |
30 | - pip: | |
31 | - pyogrio |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | dependencies: | |
4 | - python=3.8 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - geos | |
11 | - packaging | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | - matplotlib | |
20 | #- geopy | |
21 | - SQLalchemy | |
22 | - libspatialite | |
23 | - pip | |
24 | - pip: | |
25 | - geopy | |
26 | - mapclassify | |
27 | - pyarrow | |
28 | - folium | |
29 | - xyzservices |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | - conda-forge | |
4 | dependencies: | |
5 | - python=3.8 | |
6 | # required | |
7 | - numpy=1.18 | |
8 | - pandas==1.0.5 | |
9 | - shapely=1.7 | |
10 | - fiona=1.8.13.post1 | |
11 | - pyproj=2.6.1.post1 | |
12 | - packaging | |
13 | #- pyproj | |
14 | # testing | |
15 | - pytest | |
16 | - pytest-cov | |
17 | - pytest-xdist | |
18 | - fsspec | |
19 | # optional | |
20 | - rtree | |
21 | - matplotlib | |
22 | - matplotlib=3.2 | |
23 | - mapclassify=2.4.0 | |
24 | - geopy | |
25 | - SQLalchemy | |
26 | - libspatialite | |
27 | - pyarrow |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.8 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | # testing | |
11 | - pytest | |
12 | - pytest-cov | |
13 | - pytest-xdist |
0 | name: test | |
1 | channels: | |
2 | - defaults | |
3 | dependencies: | |
4 | - python=3.8 | |
5 | # required | |
6 | - pandas=1.1 | |
7 | - shapely | |
8 | - fiona | |
9 | - numpy=<1.19 | |
10 | - pyproj=3.1.0 | |
11 | #- pyproj | |
12 | - geos | |
13 | - packaging | |
14 | # testing | |
15 | - pytest | |
16 | - pytest-cov | |
17 | - pytest-xdist | |
18 | - fsspec | |
19 | # optional | |
20 | - rtree | |
21 | - matplotlib | |
22 | #- geopy | |
23 | - SQLalchemy | |
24 | - libspatialite | |
25 | - pip | |
26 | - pip: | |
27 | - geopy | |
28 | - mapclassify==2.4.0 | |
29 | - pyarrow |
3 | 3 | dependencies: |
4 | 4 | - python=3.9 |
5 | 5 | # required |
6 | - pandas | |
6 | - pandas=1.3 | |
7 | 7 | - shapely |
8 | 8 | - fiona |
9 | 9 | - pyproj |
10 | 10 | - pygeos |
11 | - packaging | |
11 | 12 | # testing |
12 | 13 | - pytest |
13 | 14 | - pytest-cov |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.9 | |
5 | # required | |
6 | - pandas | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - packaging | |
11 | # testing | |
12 | - pytest | |
13 | - pytest-cov | |
14 | - pytest-xdist |
0 | name: test | |
1 | channels: | |
2 | - conda-forge | |
3 | dependencies: | |
4 | - python=3.9 | |
5 | # required | |
6 | - pandas=1.2 | |
7 | - shapely | |
8 | - fiona | |
9 | - pyproj | |
10 | - pygeos | |
11 | - packaging | |
12 | # testing | |
13 | - pytest | |
14 | - pytest-cov | |
15 | - pytest-xdist | |
16 | - fsspec | |
17 | # optional | |
18 | - rtree | |
19 | - matplotlib | |
20 | - mapclassify | |
21 | - folium | |
22 | - xyzservices | |
23 | - scipy | |
24 | - geopy | |
25 | # installed in tests.yaml, because not available on windows | |
26 | # - postgis | |
27 | - SQLalchemy | |
28 | - psycopg2 | |
29 | - libspatialite | |
30 | - geoalchemy2 | |
31 | - pyarrow | |
32 | # doctest testing | |
33 | - pytest-doctestplus |
38 | 38 | - libpysal=4.5.1 |
39 | 39 | - pygeos=0.10.2 |
40 | 40 | - xyzservices=2021.9.1 |
41 | - packaging=21.0 | |
41 | 42 | - pip |
42 | 43 | - pip: |
43 | 44 | - sphinx-toggleprompt |
43 | 43 | |
44 | 44 | ## Download |
45 | 45 | |
46 | You can download all version in SVG and PNG from [GitHub repository](https://github.com/geopandas/geopandas/tree/master/doc/source/_static/logo). | |
46 | You can download all version in SVG and PNG from [GitHub repository](https://github.com/geopandas/geopandas/tree/main/doc/source/_static/logo). | |
47 | 47 | |
48 | 48 | |
49 | 49 | ## Colors |
20 | 20 | |
21 | 21 | - All existing tests should pass. Please make sure that the test |
22 | 22 | suite passes, both locally and on |
23 | `GitHub Actions <hhttps://github.com/geopandas/geopandas/actions>`_. Status on | |
23 | `GitHub Actions <https://github.com/geopandas/geopandas/actions>`_. Status on | |
24 | 24 | GHA will be visible on a pull request. GHA are automatically enabled |
25 | 25 | on your own fork as well. To trigger a check, make a PR to your own fork. |
26 | 26 | |
43 | 43 | imports when possible, and explicit relative imports for local |
44 | 44 | imports when necessary in tests. |
45 | 45 | |
46 | - GeoPandas supports Python 3.7+ only. The last version of GeoPandas | |
46 | - GeoPandas supports Python 3.8+ only. The last version of GeoPandas | |
47 | 47 | supporting Python 2 is 0.6. |
48 | 48 | |
49 | 49 | |
113 | 113 | Creating a branch |
114 | 114 | ~~~~~~~~~~~~~~~~~~ |
115 | 115 | |
116 | You want your master branch to reflect only production-ready code, so create a | |
116 | You want your main branch to reflect only production-ready code, so create a | |
117 | 117 | feature branch for making your changes. For example:: |
118 | 118 | |
119 | 119 | git branch shiny-new-feature |
128 | 128 | what the branch brings to *GeoPandas*. You can have many shiny-new-features |
129 | 129 | and switch in between them using the git checkout command. |
130 | 130 | |
131 | To update this branch, you need to retrieve the changes from the master branch:: | |
131 | To update this branch, you need to retrieve the changes from the main branch:: | |
132 | 132 | |
133 | 133 | git fetch upstream |
134 | git rebase upstream/master | |
135 | ||
136 | This will replay your commits on top of the latest GeoPandas git master. If this | |
134 | git rebase upstream/main | |
135 | ||
136 | This will replay your commits on top of the latest GeoPandas git main. If this | |
137 | 137 | leads to merge conflicts, you must resolve these before submitting your pull |
138 | 138 | request. If you have uncommitted changes, you will need to ``stash`` them prior |
139 | 139 | to updating. This will effectively store your changes and they can be reapplied |
154 | 154 | - Make sure that you have :ref:`cloned the repository <contributing.forking>` |
155 | 155 | - ``cd`` to the *geopandas** source directory |
156 | 156 | |
157 | Tell conda to create a new environment, named ``geopandas_dev``, or any other name you would like | |
157 | Using the provided environment | |
158 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
159 | ||
160 | *GeoPandas* provides an environment which includes the required dependencies for development. | |
161 | The environment file is located in the top level of the repo and is named ``environment-dev.yml``. | |
162 | You can create this environment by navigating to the the *GeoPandas* source directory | |
163 | and running:: | |
164 | ||
165 | conda env create -f environment-dev.yml | |
166 | ||
167 | This will create a new conda environment named ``geopandas_dev``. | |
168 | ||
169 | Creating the environment manually | |
170 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
171 | ||
172 | Alternatively, it is possible to create a development environment manually. To do this, | |
173 | tell conda to create a new environment named ``geopandas_dev``, or any other name you would like | |
158 | 174 | for this environment, by running:: |
159 | 175 | |
160 | 176 | conda create -n geopandas_dev python |
162 | 178 | This will create the new environment, and not touch any of your existing environments, |
163 | 179 | nor any existing python installation. |
164 | 180 | |
181 | Working with the environment | |
182 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
183 | ||
165 | 184 | To work in this environment, you need to ``activate`` it. The instructions below |
166 | 185 | should work for both Windows, Mac and Linux:: |
167 | 186 | |
182 | 201 | |
183 | 202 | At this point you can easily do a *development* install, as detailed in the next sections. |
184 | 203 | |
204 | ||
185 | 205 | 3) Installing Dependencies |
186 | 206 | -------------------------- |
187 | 207 | |
188 | 208 | To run *GeoPandas* in an development environment, you must first install |
189 | *GeoPandas*'s dependencies. We suggest doing so using the following commands | |
190 | (executed after your development environment has been activated):: | |
209 | *GeoPandas*'s dependencies. If you used the provided environment in section 2, skip this | |
210 | step and continue to section 4. If you created the environment manually, we suggest installing | |
211 | dependencies using the following commands (executed after your development environment has been activated):: | |
191 | 212 | |
192 | 213 | conda install -c conda-forge pandas fiona shapely pyproj rtree pytest |
193 | 214 | |
252 | 273 | <http://www.sphinx-doc.org/en/stable/rest.html#rst-primer>`_ and MyST syntax for ``md`` |
253 | 274 | files `explained here <https://myst-parser.readthedocs.io/en/latest/index.html>`_. |
254 | 275 | The docstrings follow the `Numpy Docstring standard |
255 | <https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt>`_. Some pages | |
276 | <https://github.com/numpy/numpy/blob/main/doc/HOWTO_DOCUMENT.rst.txt>`_. Some pages | |
256 | 277 | and examples are Jupyter notebooks converted to docs using `nbsphinx |
257 | 278 | <https://nbsphinx.readthedocs.io/>`_. Jupyter notebooks should be stored without the output. |
258 | 279 | |
307 | 328 | submitting code to run the check yourself:: |
308 | 329 | |
309 | 330 | black geopandas |
310 | git diff upstream/master -u -- "*.py" | flake8 --diff | |
331 | git diff upstream/main -u -- "*.py" | flake8 --diff | |
311 | 332 | |
312 | 333 | to auto-format your code. Additionally, many editors have plugins that will |
313 | 334 | apply ``black`` as you edit files. |
314 | 335 | |
315 | 336 | Optionally (but recommended), you can setup `pre-commit hooks <https://pre-commit.com/>`_ |
316 | to automatically run ``black`` and ``flake8`` when you make a git commit. This | |
317 | can be done by installing ``pre-commit``:: | |
337 | to automatically run ``black`` and ``flake8`` when you make a git commit. If you did not | |
338 | use the provided development environment in ``environment-dev.yml``, you must first install ``pre-commit``:: | |
318 | 339 | |
319 | 340 | $ python -m pip install pre-commit |
320 | 341 |
85 | 85 | |
86 | 86 | # General information about the project. |
87 | 87 | project = u"GeoPandas" |
88 | copyright = u"2013–2021, GeoPandas developers" | |
88 | copyright = u"2013–2022, GeoPandas developers" | |
89 | 89 | |
90 | 90 | # The version info for the project you're documenting, acts as replacement for |
91 | 91 | # |version| and |release|, also used in various other places throughout the |
92 | 92 | # built documents. |
93 | 93 | import geopandas |
94 | 94 | |
95 | version = release = geopandas.__version__ | |
95 | release = release = geopandas.__version__ | |
96 | version = release | |
97 | if "+" in version: | |
98 | version, remainder = release.split("+") | |
99 | if not remainder.startswith("0"): | |
100 | version = version + ".dev+" + remainder.split(".")[0] | |
96 | 101 | |
97 | 102 | # The language for content autogenerated by Sphinx. Refer to documentation |
98 | 103 | # for a list of supported languages. |
325 | 330 | .. note:: |
326 | 331 | |
327 | 332 | | This page was generated from `{{ docname }}`__. |
328 | | Interactive online version: :raw-html:`<a href="https://mybinder.org/v2/gh/geopandas/geopandas/master?urlpath=lab/tree/doc/source/{{ docname }}"><img alt="Binder badge" src="https://mybinder.org/badge_logo.svg" style="vertical-align:text-bottom"></a>` | |
329 | ||
330 | __ https://github.com/geopandas/geopandas/blob/master/doc/source/{{ docname }} | |
333 | | Interactive online version: :raw-html:`<a href="https://mybinder.org/v2/gh/geopandas/geopandas/main?urlpath=lab/tree/doc/source/{{ docname }}"><img alt="Binder badge" src="https://mybinder.org/badge_logo.svg" style="vertical-align:text-bottom"></a>` | |
334 | ||
335 | __ https://github.com/geopandas/geopandas/blob/main/doc/source/{{ docname }} | |
331 | 336 | """ |
332 | 337 | |
333 | 338 | # --Options for sphinx extensions ----------------------------------------------- |
68 | 68 | .. autosummary:: |
69 | 69 | :toctree: api/ |
70 | 70 | |
71 | GeoSeries.clip_by_rect | |
71 | 72 | GeoSeries.difference |
72 | 73 | GeoSeries.intersection |
73 | 74 | GeoSeries.symmetric_difference |
74 | 74 | * 'sum' |
75 | 75 | * 'mean' |
76 | 76 | * 'median' |
77 | * function | |
78 | * string function name | |
79 | * list of functions and/or function names, e.g. [np.sum, 'mean'] | |
80 | * dict of axis labels -> functions, function names or list of such. | |
81 | ||
82 | For example, to get the number of contries on each continent, | |
83 | as well as the populations of the largest and smallest country of each, | |
84 | we can aggregate the ``'name'`` column using ``'count'``, | |
85 | and the ``'pop_est'`` column using ``'min'`` and ``'max'``: | |
86 | ||
87 | .. ipython:: python | |
88 | ||
89 | world = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres")) | |
90 | continents = world.dissolve( | |
91 | by="continent", | |
92 | aggfunc={ | |
93 | "name": "count", | |
94 | "pop_est": ["min", "max"], | |
95 | }, | |
96 | ) | |
97 | ||
98 | continents.head()⏎ |
59 | 59 | |
60 | 60 | .. method:: GeoSeries.scale(self, xfact=1.0, yfact=1.0, zfact=1.0, origin='center') |
61 | 61 | |
62 | Scale the geometries of the :class:`~geopandas.GeoSeries` along each (x, y, z) dimensio. | |
62 | Scale the geometries of the :class:`~geopandas.GeoSeries` along each (x, y, z) dimension. | |
63 | 63 | |
64 | 64 | .. method:: GeoSeries.skew(self, angle, origin='center', use_radians=False) |
65 | 65 |
1 | 1 | "cells": [ |
2 | 2 | { |
3 | 3 | "cell_type": "markdown", |
4 | "id": "c554e753", | |
5 | "metadata": {}, | |
4 | 6 | "source": [ |
5 | 7 | "# Interactive mapping\n", |
6 | 8 | "\n", |
9 | 11 | "Creating maps for interactive exploration mirrors the API of [static plots](../reference/api/geopandas.GeoDataFrame.plot.html) in an [explore()](../reference/api/geopandas.GeoDataFrame.explore.html) method of a GeoSeries or GeoDataFrame.\n", |
10 | 12 | "\n", |
11 | 13 | "Loading some example data:" |
12 | ], | |
13 | "metadata": {} | |
14 | ] | |
14 | 15 | }, |
15 | 16 | { |
16 | 17 | "cell_type": "code", |
17 | 18 | "execution_count": null, |
19 | "id": "caf2fbd5", | |
20 | "metadata": {}, | |
21 | "outputs": [], | |
18 | 22 | "source": [ |
19 | 23 | "import geopandas\n", |
20 | 24 | "\n", |
21 | 25 | "nybb = geopandas.read_file(geopandas.datasets.get_path('nybb'))\n", |
22 | 26 | "world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))\n", |
23 | 27 | "cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))" |
24 | ], | |
25 | "outputs": [], | |
26 | "metadata": {} | |
28 | ] | |
27 | 29 | }, |
28 | 30 | { |
29 | 31 | "cell_type": "markdown", |
32 | "id": "56bf1bcf", | |
33 | "metadata": {}, | |
30 | 34 | "source": [ |
31 | 35 | "The simplest option is to use `GeoDataFrame.explore()`:" |
32 | ], | |
33 | "metadata": {} | |
36 | ] | |
34 | 37 | }, |
35 | 38 | { |
36 | 39 | "cell_type": "code", |
37 | 40 | "execution_count": null, |
41 | "id": "6b484ecc", | |
42 | "metadata": {}, | |
43 | "outputs": [], | |
38 | 44 | "source": [ |
39 | 45 | "nybb.explore()" |
40 | ], | |
41 | "outputs": [], | |
42 | "metadata": {} | |
46 | ] | |
43 | 47 | }, |
44 | 48 | { |
45 | 49 | "cell_type": "markdown", |
50 | "id": "7a797389", | |
51 | "metadata": {}, | |
46 | 52 | "source": [ |
47 | 53 | "Interactive plotting offers largely the same customisation as static one plus some features on top of that. Check the code below which plots a customised choropleth map. You can use `\"BoroName\"` column with NY boroughs names as an input of the choropleth, show (only) its name in the tooltip on hover but show all values on click. You can also pass custom background tiles (either a name supported by folium, a name recognized by `xyzservices.providers.query_name()`, XYZ URL or `xyzservices.TileProvider` object), specify colormap (all supported by `matplotlib`) and specify black outline." |
48 | ], | |
49 | "metadata": {} | |
54 | ] | |
55 | }, | |
56 | { | |
57 | "cell_type": "markdown", | |
58 | "id": "798bf532", | |
59 | "metadata": {}, | |
60 | "source": [ | |
61 | "<div class=\"alert alert-info\">\n", | |
62 | "Note\n", | |
63 | "\n", | |
64 | "Note that the GeoDataFrame needs to have a CRS set if you want to use background tiles.\n", | |
65 | "</div>" | |
66 | ] | |
50 | 67 | }, |
51 | 68 | { |
52 | 69 | "cell_type": "code", |
53 | 70 | "execution_count": null, |
71 | "id": "94b4ff24", | |
72 | "metadata": {}, | |
73 | "outputs": [], | |
54 | 74 | "source": [ |
55 | 75 | "nybb.explore( \n", |
56 | 76 | " column=\"BoroName\", # make choropleth based on \"BoroName\" column\n", |
60 | 80 | " cmap=\"Set1\", # use \"Set1\" matplotlib colormap\n", |
61 | 81 | " style_kwds=dict(color=\"black\") # use black outline\n", |
62 | 82 | " )" |
63 | ], | |
64 | "outputs": [], | |
65 | "metadata": {} | |
83 | ] | |
66 | 84 | }, |
67 | 85 | { |
68 | 86 | "cell_type": "markdown", |
87 | "id": "5a10291e", | |
88 | "metadata": {}, | |
69 | 89 | "source": [ |
70 | 90 | "The `explore()` method returns a `folium.Map` object, which can also be passed directly (as you do with `ax` in `plot()`). You can then use folium functionality directly on the resulting map. In the example below, you can plot two GeoDataFrames on the same map and add layer control using folium. You can also add additional tiles allowing you to change the background directly in the map." |
71 | ], | |
72 | "metadata": {} | |
91 | ] | |
73 | 92 | }, |
74 | 93 | { |
75 | 94 | "cell_type": "code", |
76 | 95 | "execution_count": null, |
96 | "id": "cba9970b", | |
97 | "metadata": {}, | |
98 | "outputs": [], | |
77 | 99 | "source": [ |
78 | 100 | "import folium\n", |
79 | 101 | "\n", |
99 | 121 | "folium.LayerControl().add_to(m) # use folium to add layer control\n", |
100 | 122 | "\n", |
101 | 123 | "m # show map" |
102 | ], | |
103 | "outputs": [], | |
104 | "metadata": {} | |
124 | ] | |
105 | 125 | } |
106 | 126 | ], |
107 | 127 | "metadata": { |
125 | 145 | }, |
126 | 146 | "nbformat": 4, |
127 | 147 | "nbformat_minor": 5 |
128 | }⏎ | |
148 | } |
28 | 28 | the ``layer`` keyword:: |
29 | 29 | |
30 | 30 | countries_gdf = geopandas.read_file("package.gpkg", layer='countries') |
31 | ||
31 | ||
32 | Currently fiona only exposes the default drivers. To display those, type:: | |
33 | ||
34 | import fiona; fiona.supported_drivers | |
35 | ||
36 | There is an `array <https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py>`_ | |
37 | of unexposed but supported (depending on the GDAL-build) drivers. One can activate | |
38 | these on runtime by updating the `supported_drivers` dictionary like:: | |
39 | ||
40 | fiona.supported_drivers["NAS"] = "raw" | |
41 | ||
32 | 42 | Where supported in :mod:`fiona`, *geopandas* can also load resources directly from |
33 | 43 | a web URL, for example for GeoJSON files from `geojson.xyz <http://geojson.xyz/>`_:: |
34 | 44 | |
134 | 144 | ^^^^^^^^^^^^^^^^^^^^ |
135 | 145 | |
136 | 146 | Load in a subset of fields from the file: |
147 | ||
148 | .. note:: Requires Fiona 1.9+ | |
149 | ||
150 | .. code-block:: python | |
151 | ||
152 | gdf = geopandas.read_file( | |
153 | geopandas.datasets.get_path("naturalearth_lowres"), | |
154 | include_fields=["pop_est", "continent", "name"], | |
155 | ) | |
137 | 156 | |
138 | 157 | .. note:: Requires Fiona 1.8+ |
139 | 158 |
108 | 108 | columns of the same GeoDataFrame. The projection is now stored together with geometries per column (directly |
109 | 109 | on the GeometryArray level). |
110 | 110 | |
111 | Note that if GeometryArray has assigned projection, it is preferred over the | |
112 | projection passed to GeoSeries or GeoDataFrame during the creation: | |
111 | Note that if GeometryArray has an assigned projection, it cannot be overridden by an another inconsistent | |
112 | projection during the creation of a GeoSeries or GeoDataFrame: | |
113 | 113 | |
114 | 114 | .. code-block:: python |
115 | 115 | |
120 | 120 | - Lat[north]: Geodetic latitude (degree) |
121 | 121 | - Lon[east]: Geodetic longitude (degree) |
122 | 122 | ... |
123 | >>> GeoSeries(array, crs=3395).crs # crs=3395 is ignored as array already has CRS | |
124 | FutureWarning: CRS mismatch between CRS of the passed geometries and 'crs'. Use 'GeoDataFrame.set_crs(crs, allow_override=True)' to overwrite CRS or 'GeoDataFrame.to_crs(crs)' to reproject geometries. CRS mismatch will raise an error in the future versions of GeoPandas. | |
123 | >>> GeoSeries(array, crs=4326) # crs=4326 is okay, as it matches the existing CRS | |
124 | >>> GeoSeries(array, crs=3395) # crs=3395 is forbidden as array already has CRS | |
125 | ValueError: CRS mismatch between CRS of the passed geometries and 'crs'. Use 'GeoSeries.set_crs(crs, allow_override=True)' to overwrite CRS or 'GeoSeries.to_crs(crs)' to reproject geometries. | |
125 | 126 | GeoSeries(array, crs=3395).crs |
126 | 127 | |
127 | <Geographic 2D CRS: EPSG:4326> | |
128 | Name: WGS 84 | |
129 | Axis Info [ellipsoidal]: | |
130 | - Lat[north]: Geodetic latitude (degree) | |
131 | - Lon[east]: Geodetic longitude (degree) | |
132 | ... | |
133 | ||
134 | If you want to overwrite projection, you can then assign it to the GeoSeries | |
128 | If you want to overwrite the projection, you can then assign it to the GeoSeries | |
135 | 129 | manually or re-project geometries to the target projection using either |
136 | 130 | ``GeoSeries.set_crs(epsg=3395, allow_override=True)`` or |
137 | 131 | ``GeoSeries.to_crs(epsg=3395)``. |
211 | 211 | More Examples |
212 | 212 | ------------- |
213 | 213 | |
214 | A larger set of examples of the use of :meth:`~geopandas.GeoDataFrame.overlay` can be found `here <https://nbviewer.jupyter.org/github/geopandas/geopandas/blob/master/doc/source/gallery/overlays.ipynb>`_ | |
214 | A larger set of examples of the use of :meth:`~geopandas.GeoDataFrame.overlay` can be found `here <https://nbviewer.jupyter.org/github/geopandas/geopandas/blob/main/doc/source/gallery/overlays.ipynb>`_ | |
215 | 215 | |
216 | 216 | |
217 | 217 |
1 | 1 | "cells": [ |
2 | 2 | { |
3 | 3 | "cell_type": "markdown", |
4 | "metadata": {}, | |
4 | 5 | "source": [ |
5 | 6 | "# Spatial Joins\n", |
6 | 7 | "\n", |
11 | 12 | "A common use case might be a spatial join between a point layer and a polygon layer where you want to retain the point geometries and grab the attributes of the intersecting polygons.\n", |
12 | 13 | "\n", |
13 | 14 | "![illustration](https://web.natur.cuni.cz/~langhamr/lectures/vtfg1/mapinfo_1/about_gis/Image23.gif)" |
14 | ], | |
15 | "metadata": {} | |
16 | }, | |
17 | { | |
18 | "cell_type": "markdown", | |
15 | ] | |
16 | }, | |
17 | { | |
18 | "cell_type": "markdown", | |
19 | "metadata": {}, | |
19 | 20 | "source": [ |
20 | 21 | "\n", |
21 | 22 | "## Types of spatial joins\n", |
83 | 84 | " 0101000000F0D88AA0E1A4EEBF7052F7E5B115E9BF | 2 | 20\n", |
84 | 85 | "(4 rows) \n", |
85 | 86 | "```" |
86 | ], | |
87 | "metadata": {} | |
88 | }, | |
89 | { | |
90 | "cell_type": "markdown", | |
87 | ] | |
88 | }, | |
89 | { | |
90 | "cell_type": "markdown", | |
91 | "metadata": {}, | |
91 | 92 | "source": [ |
92 | 93 | "## Spatial Joins between two GeoDataFrames\n", |
93 | 94 | "\n", |
94 | 95 | "Let's take a look at how we'd implement these using `GeoPandas`. First, load up the NYC test data into `GeoDataFrames`:" |
95 | ], | |
96 | "metadata": {} | |
97 | }, | |
98 | { | |
99 | "cell_type": "code", | |
100 | "execution_count": null, | |
96 | ] | |
97 | }, | |
98 | { | |
99 | "cell_type": "code", | |
100 | "execution_count": null, | |
101 | "metadata": {}, | |
102 | "outputs": [], | |
101 | 103 | "source": [ |
102 | 104 | "%matplotlib inline\n", |
103 | 105 | "from shapely.geometry import Point\n", |
117 | 119 | "\n", |
118 | 120 | "# Make sure they're using the same projection reference\n", |
119 | 121 | "pointdf.crs = polydf.crs" |
120 | ], | |
121 | "outputs": [], | |
122 | "metadata": {} | |
123 | }, | |
124 | { | |
125 | "cell_type": "code", | |
126 | "execution_count": null, | |
122 | ] | |
123 | }, | |
124 | { | |
125 | "cell_type": "code", | |
126 | "execution_count": null, | |
127 | "metadata": {}, | |
128 | "outputs": [], | |
127 | 129 | "source": [ |
128 | 130 | "pointdf" |
129 | ], | |
130 | "outputs": [], | |
131 | "metadata": {} | |
132 | }, | |
133 | { | |
134 | "cell_type": "code", | |
135 | "execution_count": null, | |
131 | ] | |
132 | }, | |
133 | { | |
134 | "cell_type": "code", | |
135 | "execution_count": null, | |
136 | "metadata": {}, | |
137 | "outputs": [], | |
136 | 138 | "source": [ |
137 | 139 | "polydf" |
138 | ], | |
139 | "outputs": [], | |
140 | "metadata": {} | |
141 | }, | |
142 | { | |
143 | "cell_type": "code", | |
144 | "execution_count": null, | |
140 | ] | |
141 | }, | |
142 | { | |
143 | "cell_type": "code", | |
144 | "execution_count": null, | |
145 | "metadata": {}, | |
146 | "outputs": [], | |
145 | 147 | "source": [ |
146 | 148 | "pointdf.plot()" |
147 | ], | |
148 | "outputs": [], | |
149 | "metadata": {} | |
150 | }, | |
151 | { | |
152 | "cell_type": "code", | |
153 | "execution_count": null, | |
149 | ] | |
150 | }, | |
151 | { | |
152 | "cell_type": "code", | |
153 | "execution_count": null, | |
154 | "metadata": {}, | |
155 | "outputs": [], | |
154 | 156 | "source": [ |
155 | 157 | "polydf.plot()" |
156 | ], | |
157 | "outputs": [], | |
158 | "metadata": {} | |
159 | }, | |
160 | { | |
161 | "cell_type": "markdown", | |
158 | ] | |
159 | }, | |
160 | { | |
161 | "cell_type": "markdown", | |
162 | "metadata": {}, | |
162 | 163 | "source": [ |
163 | 164 | "## Joins" |
164 | ], | |
165 | "metadata": {} | |
166 | }, | |
167 | { | |
168 | "cell_type": "code", | |
169 | "execution_count": null, | |
165 | ] | |
166 | }, | |
167 | { | |
168 | "cell_type": "code", | |
169 | "execution_count": null, | |
170 | "metadata": {}, | |
171 | "outputs": [], | |
170 | 172 | "source": [ |
171 | 173 | "join_left_df = pointdf.sjoin(polydf, how=\"left\")\n", |
172 | 174 | "join_left_df\n", |
173 | 175 | "# Note the NaNs where the point did not intersect a boro" |
174 | ], | |
175 | "outputs": [], | |
176 | "metadata": {} | |
177 | }, | |
178 | { | |
179 | "cell_type": "code", | |
180 | "execution_count": null, | |
176 | ] | |
177 | }, | |
178 | { | |
179 | "cell_type": "code", | |
180 | "execution_count": null, | |
181 | "metadata": {}, | |
182 | "outputs": [], | |
181 | 183 | "source": [ |
182 | 184 | "join_right_df = pointdf.sjoin(polydf, how=\"right\")\n", |
183 | 185 | "join_right_df\n", |
184 | 186 | "# Note Staten Island is repeated" |
185 | ], | |
186 | "outputs": [], | |
187 | "metadata": {} | |
188 | }, | |
189 | { | |
190 | "cell_type": "code", | |
191 | "execution_count": null, | |
187 | ] | |
188 | }, | |
189 | { | |
190 | "cell_type": "code", | |
191 | "execution_count": null, | |
192 | "metadata": {}, | |
193 | "outputs": [], | |
192 | 194 | "source": [ |
193 | 195 | "join_inner_df = pointdf.sjoin(polydf, how=\"inner\")\n", |
194 | 196 | "join_inner_df\n", |
195 | 197 | "# Note the lack of NaNs; dropped anything that didn't intersect" |
196 | ], | |
197 | "outputs": [], | |
198 | "metadata": {} | |
199 | }, | |
200 | { | |
201 | "cell_type": "markdown", | |
198 | ] | |
199 | }, | |
200 | { | |
201 | "cell_type": "markdown", | |
202 | "metadata": {}, | |
202 | 203 | "source": [ |
203 | 204 | "We're not limited to using the `intersection` binary predicate. Any of the `Shapely` geometry methods that return a Boolean can be used by specifying the `op` kwarg." |
204 | ], | |
205 | "metadata": {} | |
206 | }, | |
207 | { | |
208 | "cell_type": "code", | |
209 | "execution_count": null, | |
210 | "source": [ | |
211 | "pointdf.sjoin(polydf, how=\"left\", op=\"within\")" | |
212 | ], | |
213 | "outputs": [], | |
214 | "metadata": {} | |
205 | ] | |
206 | }, | |
207 | { | |
208 | "cell_type": "code", | |
209 | "execution_count": null, | |
210 | "metadata": {}, | |
211 | "outputs": [], | |
212 | "source": [ | |
213 | "pointdf.sjoin(polydf, how=\"left\", predicate=\"within\")" | |
214 | ] | |
215 | }, | |
216 | { | |
217 | "cell_type": "markdown", | |
218 | "metadata": {}, | |
219 | "source": [ | |
220 | "We can also conduct a nearest neighbour join with `sjoin_nearest`." | |
221 | ] | |
222 | }, | |
223 | { | |
224 | "cell_type": "code", | |
225 | "execution_count": null, | |
226 | "metadata": {}, | |
227 | "outputs": [], | |
228 | "source": [ | |
229 | "pointdf.sjoin_nearest(polydf, how=\"left\", distance_col=\"Distances\")\n", | |
230 | "# Note the optional Distances column with computed distances between each point\n", | |
231 | "# and the nearest polydf geometry." | |
232 | ] | |
215 | 233 | } |
216 | 234 | ], |
217 | 235 | "metadata": { |
235 | 253 | }, |
236 | 254 | "nbformat": 4, |
237 | 255 | "nbformat_minor": 4 |
238 | }⏎ | |
256 | } |
92 | 92 | installed correctly. |
93 | 93 | |
94 | 94 | - `fiona`_ provides binary wheels with the dependencies included for Mac and Linux, |
95 | but not for Windows. | |
95 | but not for Windows. Alternatively, you can install `pyogrio`_ which does | |
96 | have wheels for Windows. | |
96 | 97 | - `pyproj`_, `rtree`_, and `shapely`_ provide binary wheels with dependencies included |
97 | 98 | for Mac, Linux, and Windows. |
98 | - Windows wheels for `shapely`, `fiona`, `pyproj` and `rtree` | |
99 | can be found at `Christopher Gohlke's website | |
100 | <https://www.lfd.uci.edu/~gohlke/pythonlibs/>`_. | |
101 | 99 | |
102 | 100 | Depending on your platform, you might need to compile and install their |
103 | 101 | C dependencies manually. We refer to the individual packages for more |
137 | 135 | Required dependencies: |
138 | 136 | |
139 | 137 | - `numpy`_ |
140 | - `pandas`_ (version 0.25 or later) | |
141 | - `shapely`_ (interface to `GEOS`_) | |
142 | - `fiona`_ (interface to `GDAL`_) | |
143 | - `pyproj`_ (interface to `PROJ`_; version 2.2.0 or later) | |
138 | - `pandas`_ (version 1.0 or later) | |
139 | - `shapely`_ (interface to `GEOS`_; version 1.7 or later) | |
140 | - `fiona`_ (interface to `GDAL`_; version 1.8 or later) | |
141 | - `pyproj`_ (interface to `PROJ`_; version 2.6.1 or later) | |
142 | - `packaging`_ | |
144 | 143 | |
145 | 144 | Further, optional dependencies are: |
146 | 145 | |
146 | - `pyogrio`_ (optional; experimental alternative for fiona) | |
147 | 147 | - `rtree`_ (optional; spatial index to improve performance and required for |
148 | 148 | overlay operations; interface to `libspatialindex`_) |
149 | 149 | - `psycopg2`_ (optional; for PostGIS connection) |
153 | 153 | |
154 | 154 | For plotting, these additional packages may be used: |
155 | 155 | |
156 | - `matplotlib`_ (>= 3.1.0) | |
156 | - `matplotlib`_ (>= 3.2.0) | |
157 | 157 | - `mapclassify`_ (>= 2.4.0) |
158 | 158 | |
159 | 159 | |
210 | 210 | |
211 | 211 | .. _fiona: https://fiona.readthedocs.io |
212 | 212 | |
213 | .. _pyogrio: https://pyogrio.readthedocs.io | |
214 | ||
213 | 215 | .. _matplotlib: http://matplotlib.org |
214 | 216 | |
215 | 217 | .. _geopy: https://github.com/geopy/geopy |
241 | 243 | .. _PROJ: https://proj.org/ |
242 | 244 | |
243 | 245 | .. _PyGEOS: https://github.com/pygeos/pygeos/ |
246 | ||
247 | .. _packaging: https://packaging.pypa.io/en/latest/⏎ |
1 | 1 | channels: |
2 | 2 | - conda-forge |
3 | 3 | dependencies: |
4 | - python | |
4 | 5 | # required |
5 | 6 | - fiona>=1.8 |
6 | - pandas>=0.25 | |
7 | - pyproj>=2.2.0 | |
8 | - shapely>=1.6 | |
9 | ||
10 | # geodatabase access | |
11 | - psycopg2>=2.5.1 | |
12 | - SQLAlchemy>=0.8.3 | |
13 | ||
14 | # geocoding | |
15 | - geopy | |
16 | ||
17 | # plotting | |
18 | - matplotlib>=2.2 | |
19 | - mapclassify | |
7 | - pandas>=1.0.0 | |
8 | - pygeos | |
9 | - pyproj>=2.6.1.post1 | |
10 | - shapely>=1.7 | |
11 | - packaging | |
20 | 12 | |
21 | 13 | # testing |
22 | 14 | - pytest>=3.1.0 |
23 | 15 | - pytest-cov |
16 | - pytest-xdist | |
17 | - fsspec | |
24 | 18 | - codecov |
25 | ||
26 | # spatial access methods | |
27 | - rtree>=0.8 | |
28 | ||
29 | 19 | # styling |
30 | 20 | - black |
31 | 21 | - pre-commit |
22 | ||
23 | # optional | |
24 | - folium | |
25 | - xyzservices | |
26 | - scipy | |
27 | - libspatialite | |
28 | - geoalchemy2 | |
29 | - pyarrow | |
30 | # doctest testing | |
31 | - pytest-doctestplus | |
32 | # geocoding | |
33 | - geopy | |
34 | # geodatabase access | |
35 | - psycopg2>=2.8.0 | |
36 | - SQLAlchemy>=1.3 | |
37 | # plotting | |
38 | - matplotlib>=3.2 | |
39 | - mapclassify | |
40 | # spatial access methods | |
41 | - rtree>=0.9 |
0 | 0 | # Examples Gallery |
1 | 1 | |
2 | Examples are available in the [documentation](https://geopandas.readthedocs.io/en/latest/gallery/index.html). Source Jupyter notebooks are in [`doc/source/gallery`](https://github.com/geopandas/geopandas/tree/master/doc/source/gallery). | |
2 | Examples are available in the [documentation](https://geopandas.readthedocs.io/en/latest/gallery/index.html). Source Jupyter notebooks are in [`doc/source/gallery`](https://github.com/geopandas/geopandas/tree/main/doc/source/gallery). |
22 | 22 | import pandas as pd # noqa |
23 | 23 | import numpy as np # noqa |
24 | 24 | |
25 | from ._version import get_versions | |
25 | from . import _version | |
26 | 26 | |
27 | __version__ = get_versions()["version"] | |
28 | del get_versions | |
27 | __version__ = _version.get_versions()["version"] |
0 | 0 | import contextlib |
1 | from distutils.version import LooseVersion | |
1 | from packaging.version import Version | |
2 | 2 | import importlib |
3 | 3 | import os |
4 | 4 | import warnings |
14 | 14 | # pandas compat |
15 | 15 | # ----------------------------------------------------------------------------- |
16 | 16 | |
17 | PANDAS_GE_10 = str(pd.__version__) >= LooseVersion("1.0.0") | |
18 | PANDAS_GE_11 = str(pd.__version__) >= LooseVersion("1.1.0") | |
19 | PANDAS_GE_115 = str(pd.__version__) >= LooseVersion("1.1.5") | |
20 | PANDAS_GE_12 = str(pd.__version__) >= LooseVersion("1.2.0") | |
17 | PANDAS_GE_11 = Version(pd.__version__) >= Version("1.1.0") | |
18 | PANDAS_GE_115 = Version(pd.__version__) >= Version("1.1.5") | |
19 | PANDAS_GE_12 = Version(pd.__version__) >= Version("1.2.0") | |
20 | PANDAS_GE_13 = Version(pd.__version__) >= Version("1.3.0") | |
21 | PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0") | |
21 | 22 | |
22 | 23 | |
23 | 24 | # ----------------------------------------------------------------------------- |
25 | 26 | # ----------------------------------------------------------------------------- |
26 | 27 | |
27 | 28 | |
28 | SHAPELY_GE_17 = str(shapely.__version__) >= LooseVersion("1.7.0") | |
29 | SHAPELY_GE_18 = str(shapely.__version__) >= LooseVersion("1.8") | |
30 | SHAPELY_GE_20 = str(shapely.__version__) >= LooseVersion("2.0") | |
29 | SHAPELY_GE_18 = Version(shapely.__version__) >= Version("1.8") | |
30 | SHAPELY_GE_182 = Version(shapely.__version__) >= Version("1.8.2") | |
31 | SHAPELY_GE_20 = Version(shapely.__version__) >= Version("2.0") | |
31 | 32 | |
32 | 33 | GEOS_GE_390 = shapely.geos.geos_version >= (3, 9, 0) |
33 | 34 | |
46 | 47 | import pygeos # noqa |
47 | 48 | |
48 | 49 | # only automatically use pygeos if version is high enough |
49 | if str(pygeos.__version__) >= LooseVersion("0.8"): | |
50 | if Version(pygeos.__version__) >= Version("0.8"): | |
50 | 51 | HAS_PYGEOS = True |
51 | PYGEOS_GE_09 = str(pygeos.__version__) >= LooseVersion("0.9") | |
52 | PYGEOS_GE_010 = str(pygeos.__version__) >= LooseVersion("0.10") | |
52 | PYGEOS_GE_09 = Version(pygeos.__version__) >= Version("0.9") | |
53 | PYGEOS_GE_010 = Version(pygeos.__version__) >= Version("0.10") | |
53 | 54 | else: |
54 | 55 | warnings.warn( |
55 | 56 | "The installed version of PyGEOS is too old ({0} installed, 0.8 required)," |
91 | 92 | import pygeos # noqa |
92 | 93 | |
93 | 94 | # validate the pygeos version |
94 | if not str(pygeos.__version__) >= LooseVersion("0.8"): | |
95 | if not Version(pygeos.__version__) >= Version("0.8"): | |
95 | 96 | raise ImportError( |
96 | 97 | "PyGEOS >= 0.8 is required, version {0} is installed".format( |
97 | 98 | pygeos.__version__ |
148 | 149 | ) |
149 | 150 | yield |
150 | 151 | |
151 | ||
152 | elif (str(np.__version__) >= LooseVersion("1.21")) and not SHAPELY_GE_20: | |
152 | elif (Version(np.__version__) >= Version("1.21")) and not SHAPELY_GE_20: | |
153 | 153 | |
154 | 154 | @contextlib.contextmanager |
155 | 155 | def ignore_shapely2_warnings(): |
161 | 161 | ) |
162 | 162 | yield |
163 | 163 | |
164 | ||
165 | 164 | else: |
166 | 165 | |
167 | 166 | @contextlib.contextmanager |
219 | 218 | except ImportError: |
220 | 219 | HAS_RTREE = False |
221 | 220 | |
221 | ||
222 | 222 | # ----------------------------------------------------------------------------- |
223 | 223 | # pyproj compat |
224 | 224 | # ----------------------------------------------------------------------------- |
225 | 225 | |
226 | PYPROJ_LT_3 = LooseVersion(pyproj.__version__) < LooseVersion("3") | |
227 | PYPROJ_GE_31 = LooseVersion(pyproj.__version__) >= LooseVersion("3.1") | |
226 | PYPROJ_LT_3 = Version(pyproj.__version__) < Version("3") | |
227 | PYPROJ_GE_31 = Version(pyproj.__version__) >= Version("3.1") | |
228 | PYPROJ_GE_32 = Version(pyproj.__version__) >= Version("3.2") |
49 | 49 | cls = self.__class__.__name__ |
50 | 50 | description = "" |
51 | 51 | for key, option in self._options.items(): |
52 | descr = u"{key}: {cur!r} [default: {default!r}]\n".format( | |
52 | descr = "{key}: {cur!r} [default: {default!r}]\n".format( | |
53 | 53 | key=key, cur=self._config[key], default=option.default_value |
54 | 54 | ) |
55 | 55 | description += descr |
57 | 57 | if option.doc: |
58 | 58 | doc_text = "\n".join(textwrap.wrap(option.doc, width=70)) |
59 | 59 | else: |
60 | doc_text = u"No description available." | |
60 | doc_text = "No description available." | |
61 | 61 | doc_text = textwrap.indent(doc_text, prefix=" ") |
62 | 62 | description += doc_text + "\n" |
63 | 63 | space = "\n " |
10 | 10 | |
11 | 11 | import shapely.geometry |
12 | 12 | import shapely.geos |
13 | import shapely.ops | |
13 | 14 | import shapely.wkb |
14 | 15 | import shapely.wkt |
15 | 16 | |
55 | 56 | return True |
56 | 57 | elif isinstance(value, float) and np.isnan(value): |
57 | 58 | return True |
58 | elif compat.PANDAS_GE_10 and value is pd.NA: | |
59 | elif value is pd.NA: | |
59 | 60 | return True |
60 | 61 | else: |
61 | 62 | return False |
727 | 728 | # |
728 | 729 | |
729 | 730 | |
731 | def clip_by_rect(data, xmin, ymin, xmax, ymax): | |
732 | if compat.USE_PYGEOS: | |
733 | return pygeos.clip_by_rect(data, xmin, ymin, xmax, ymax) | |
734 | else: | |
735 | clipped_geometries = np.empty(len(data), dtype=object) | |
736 | clipped_geometries[:] = [ | |
737 | shapely.ops.clip_by_rect(s, xmin, ymin, xmax, ymax) | |
738 | if s is not None | |
739 | else None | |
740 | for s in data | |
741 | ] | |
742 | return clipped_geometries | |
743 | ||
744 | ||
730 | 745 | def difference(data, other): |
731 | 746 | if compat.USE_PYGEOS: |
732 | 747 | return _binary_method("difference", data, other) |
4 | 4 | # that just contains the computed version number. |
5 | 5 | |
6 | 6 | # This file is released into the public domain. Generated by |
7 | # versioneer-0.16 (https://github.com/warner/python-versioneer) | |
7 | # versioneer-0.21 (https://github.com/python-versioneer/python-versioneer) | |
8 | 8 | |
9 | 9 | """Git implementation of _version.py.""" |
10 | 10 | |
13 | 13 | import re |
14 | 14 | import subprocess |
15 | 15 | import sys |
16 | from typing import Callable, Dict | |
16 | 17 | |
17 | 18 | |
18 | 19 | def get_keywords(): |
21 | 22 | # setup.py/versioneer.py will grep for the variable names, so they must |
22 | 23 | # each be defined on a line of their own. _version.py will just call |
23 | 24 | # get_keywords(). |
24 | git_refnames = " (HEAD -> master, tag: v0.10.2)" | |
25 | git_full = "04d377f321972801888381356cb6259766eb63b6" | |
26 | keywords = {"refnames": git_refnames, "full": git_full} | |
25 | git_refnames = " (HEAD -> main, tag: v0.11.0)" | |
26 | git_full = "1977b5036b9ca3a034e65ea1f5ba48b7225550a7" | |
27 | git_date = "2022-06-21 08:00:39 +0200" | |
28 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} | |
27 | 29 | return keywords |
28 | 30 | |
29 | 31 | |
49 | 51 | """Exception raised if a method is not valid for the current scenario.""" |
50 | 52 | |
51 | 53 | |
52 | LONG_VERSION_PY = {} | |
53 | HANDLERS = {} | |
54 | LONG_VERSION_PY: Dict[str, str] = {} | |
55 | HANDLERS: Dict[str, Dict[str, Callable]] = {} | |
54 | 56 | |
55 | 57 | |
56 | 58 | def register_vcs_handler(vcs, method): # decorator |
57 | """Decorator to mark a method as the handler for a particular VCS.""" | |
59 | """Create decorator to mark a method as the handler of a VCS.""" | |
58 | 60 | |
59 | 61 | def decorate(f): |
60 | 62 | """Store f in HANDLERS[vcs][method].""" |
66 | 68 | return decorate |
67 | 69 | |
68 | 70 | |
69 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): | |
71 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): | |
70 | 72 | """Call the given command(s).""" |
71 | 73 | assert isinstance(commands, list) |
72 | p = None | |
73 | for c in commands: | |
74 | process = None | |
75 | for command in commands: | |
74 | 76 | try: |
75 | dispcmd = str([c] + args) | |
77 | dispcmd = str([command] + args) | |
76 | 78 | # remember shell=False, so use git.cmd on windows, not just git |
77 | p = subprocess.Popen( | |
78 | [c] + args, | |
79 | process = subprocess.Popen( | |
80 | [command] + args, | |
79 | 81 | cwd=cwd, |
82 | env=env, | |
80 | 83 | stdout=subprocess.PIPE, |
81 | 84 | stderr=(subprocess.PIPE if hide_stderr else None), |
82 | 85 | ) |
83 | 86 | break |
84 | except EnvironmentError: | |
87 | except OSError: | |
85 | 88 | e = sys.exc_info()[1] |
86 | 89 | if e.errno == errno.ENOENT: |
87 | 90 | continue |
88 | 91 | if verbose: |
89 | 92 | print("unable to run %s" % dispcmd) |
90 | 93 | print(e) |
91 | return None | |
94 | return None, None | |
92 | 95 | else: |
93 | 96 | if verbose: |
94 | 97 | print("unable to find command, tried %s" % (commands,)) |
95 | return None | |
96 | stdout = p.communicate()[0].strip() | |
97 | if sys.version_info[0] >= 3: | |
98 | stdout = stdout.decode() | |
99 | if p.returncode != 0: | |
98 | return None, None | |
99 | stdout = process.communicate()[0].strip().decode() | |
100 | if process.returncode != 0: | |
100 | 101 | if verbose: |
101 | 102 | print("unable to run %s (error)" % dispcmd) |
102 | return None | |
103 | return stdout | |
103 | print("stdout was %s" % stdout) | |
104 | return None, process.returncode | |
105 | return stdout, process.returncode | |
104 | 106 | |
105 | 107 | |
106 | 108 | def versions_from_parentdir(parentdir_prefix, root, verbose): |
107 | 109 | """Try to determine the version from the parent directory name. |
108 | 110 | |
109 | Source tarballs conventionally unpack into a directory that includes | |
110 | both the project name and a version string. | |
111 | """ | |
112 | dirname = os.path.basename(root) | |
113 | if not dirname.startswith(parentdir_prefix): | |
114 | if verbose: | |
115 | print( | |
116 | "guessing rootdir is '%s', but '%s' doesn't start with " | |
117 | "prefix '%s'" % (root, dirname, parentdir_prefix) | |
118 | ) | |
119 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
120 | return { | |
121 | "version": dirname[len(parentdir_prefix) :], | |
122 | "full-revisionid": None, | |
123 | "dirty": False, | |
124 | "error": None, | |
125 | } | |
111 | Source tarballs conventionally unpack into a directory that includes both | |
112 | the project name and a version string. We will also support searching up | |
113 | two directory levels for an appropriately named parent directory | |
114 | """ | |
115 | rootdirs = [] | |
116 | ||
117 | for _ in range(3): | |
118 | dirname = os.path.basename(root) | |
119 | if dirname.startswith(parentdir_prefix): | |
120 | return { | |
121 | "version": dirname[len(parentdir_prefix) :], | |
122 | "full-revisionid": None, | |
123 | "dirty": False, | |
124 | "error": None, | |
125 | "date": None, | |
126 | } | |
127 | rootdirs.append(root) | |
128 | root = os.path.dirname(root) # up a level | |
129 | ||
130 | if verbose: | |
131 | print( | |
132 | "Tried directories %s but none started with prefix %s" | |
133 | % (str(rootdirs), parentdir_prefix) | |
134 | ) | |
135 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
126 | 136 | |
127 | 137 | |
128 | 138 | @register_vcs_handler("git", "get_keywords") |
134 | 144 | # _version.py. |
135 | 145 | keywords = {} |
136 | 146 | try: |
137 | f = open(versionfile_abs, "r") | |
138 | for line in f.readlines(): | |
139 | if line.strip().startswith("git_refnames ="): | |
140 | mo = re.search(r'=\s*"(.*)"', line) | |
141 | if mo: | |
142 | keywords["refnames"] = mo.group(1) | |
143 | if line.strip().startswith("git_full ="): | |
144 | mo = re.search(r'=\s*"(.*)"', line) | |
145 | if mo: | |
146 | keywords["full"] = mo.group(1) | |
147 | f.close() | |
148 | except EnvironmentError: | |
147 | with open(versionfile_abs, "r") as fobj: | |
148 | for line in fobj: | |
149 | if line.strip().startswith("git_refnames ="): | |
150 | mo = re.search(r'=\s*"(.*)"', line) | |
151 | if mo: | |
152 | keywords["refnames"] = mo.group(1) | |
153 | if line.strip().startswith("git_full ="): | |
154 | mo = re.search(r'=\s*"(.*)"', line) | |
155 | if mo: | |
156 | keywords["full"] = mo.group(1) | |
157 | if line.strip().startswith("git_date ="): | |
158 | mo = re.search(r'=\s*"(.*)"', line) | |
159 | if mo: | |
160 | keywords["date"] = mo.group(1) | |
161 | except OSError: | |
149 | 162 | pass |
150 | 163 | return keywords |
151 | 164 | |
153 | 166 | @register_vcs_handler("git", "keywords") |
154 | 167 | def git_versions_from_keywords(keywords, tag_prefix, verbose): |
155 | 168 | """Get version information from git keywords.""" |
156 | if not keywords: | |
157 | raise NotThisMethod("no keywords at all, weird") | |
169 | if "refnames" not in keywords: | |
170 | raise NotThisMethod("Short version file found") | |
171 | date = keywords.get("date") | |
172 | if date is not None: | |
173 | # Use only the last line. Previous lines may contain GPG signature | |
174 | # information. | |
175 | date = date.splitlines()[-1] | |
176 | ||
177 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant | |
178 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 | |
179 | # -like" string, which we must then edit to make compliant), because | |
180 | # it's been around since git-1.5.3, and it's too difficult to | |
181 | # discover which version we're using, or to work around using an | |
182 | # older one. | |
183 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
158 | 184 | refnames = keywords["refnames"].strip() |
159 | 185 | if refnames.startswith("$Format"): |
160 | 186 | if verbose: |
161 | 187 | print("keywords are unexpanded, not using") |
162 | 188 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") |
163 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) | |
189 | refs = {r.strip() for r in refnames.strip("()").split(",")} | |
164 | 190 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of |
165 | 191 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. |
166 | 192 | TAG = "tag: " |
167 | tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) | |
193 | tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} | |
168 | 194 | if not tags: |
169 | 195 | # Either we're using git < 1.8.3, or there really are no tags. We use |
170 | 196 | # a heuristic: assume all version tags have a digit. The old git %d |
173 | 199 | # between branches and tags. By ignoring refnames without digits, we |
174 | 200 | # filter out many common branch names like "release" and |
175 | 201 | # "stabilization", as well as "HEAD" and "master". |
176 | tags = set([r for r in refs if re.search(r"\d", r)]) | |
202 | tags = {r for r in refs if re.search(r"\d", r)} | |
177 | 203 | if verbose: |
178 | 204 | print("discarding '%s', no digits" % ",".join(refs - tags)) |
179 | 205 | if verbose: |
182 | 208 | # sorting will prefer e.g. "2.0" over "2.0rc1" |
183 | 209 | if ref.startswith(tag_prefix): |
184 | 210 | r = ref[len(tag_prefix) :] |
211 | # Filter out refs that exactly match prefix or that don't start | |
212 | # with a number once the prefix is stripped (mostly a concern | |
213 | # when prefix is '') | |
214 | if not re.match(r"\d", r): | |
215 | continue | |
185 | 216 | if verbose: |
186 | 217 | print("picking %s" % r) |
187 | 218 | return { |
189 | 220 | "full-revisionid": keywords["full"].strip(), |
190 | 221 | "dirty": False, |
191 | 222 | "error": None, |
223 | "date": date, | |
192 | 224 | } |
193 | 225 | # no suitable tags, so version is "0+unknown", but full hex is still there |
194 | 226 | if verbose: |
198 | 230 | "full-revisionid": keywords["full"].strip(), |
199 | 231 | "dirty": False, |
200 | 232 | "error": "no suitable tags", |
233 | "date": None, | |
201 | 234 | } |
202 | 235 | |
203 | 236 | |
204 | 237 | @register_vcs_handler("git", "pieces_from_vcs") |
205 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): | |
238 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): | |
206 | 239 | """Get version from 'git describe' in the root of the source tree. |
207 | 240 | |
208 | 241 | This only gets called if the git-archive 'subst' keywords were *not* |
209 | 242 | expanded, and _version.py hasn't already been rewritten with a short |
210 | 243 | version string, meaning we're inside a checked out source tree. |
211 | 244 | """ |
212 | if not os.path.exists(os.path.join(root, ".git")): | |
213 | if verbose: | |
214 | print("no .git in %s" % root) | |
215 | raise NotThisMethod("no .git directory") | |
216 | ||
217 | 245 | GITS = ["git"] |
246 | TAG_PREFIX_REGEX = "*" | |
218 | 247 | if sys.platform == "win32": |
219 | 248 | GITS = ["git.cmd", "git.exe"] |
249 | TAG_PREFIX_REGEX = r"\*" | |
250 | ||
251 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) | |
252 | if rc != 0: | |
253 | if verbose: | |
254 | print("Directory %s not under git control" % root) | |
255 | raise NotThisMethod("'git rev-parse --git-dir' returned error") | |
256 | ||
220 | 257 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] |
221 | 258 | # if there isn't one, this yields HEX[-dirty] (no NUM) |
222 | describe_out = run_command( | |
259 | describe_out, rc = runner( | |
223 | 260 | GITS, |
224 | 261 | [ |
225 | 262 | "describe", |
228 | 265 | "--always", |
229 | 266 | "--long", |
230 | 267 | "--match", |
231 | "%s*" % tag_prefix, | |
268 | "%s%s" % (tag_prefix, TAG_PREFIX_REGEX), | |
232 | 269 | ], |
233 | 270 | cwd=root, |
234 | 271 | ) |
236 | 273 | if describe_out is None: |
237 | 274 | raise NotThisMethod("'git describe' failed") |
238 | 275 | describe_out = describe_out.strip() |
239 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) | |
276 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) | |
240 | 277 | if full_out is None: |
241 | 278 | raise NotThisMethod("'git rev-parse' failed") |
242 | 279 | full_out = full_out.strip() |
245 | 282 | pieces["long"] = full_out |
246 | 283 | pieces["short"] = full_out[:7] # maybe improved later |
247 | 284 | pieces["error"] = None |
285 | ||
286 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) | |
287 | # --abbrev-ref was added in git-1.6.3 | |
288 | if rc != 0 or branch_name is None: | |
289 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") | |
290 | branch_name = branch_name.strip() | |
291 | ||
292 | if branch_name == "HEAD": | |
293 | # If we aren't exactly on a branch, pick a branch which represents | |
294 | # the current commit. If all else fails, we are on a branchless | |
295 | # commit. | |
296 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) | |
297 | # --contains was added in git-1.5.4 | |
298 | if rc != 0 or branches is None: | |
299 | raise NotThisMethod("'git branch --contains' returned error") | |
300 | branches = branches.split("\n") | |
301 | ||
302 | # Remove the first line if we're running detached | |
303 | if "(" in branches[0]: | |
304 | branches.pop(0) | |
305 | ||
306 | # Strip off the leading "* " from the list of branches. | |
307 | branches = [branch[2:] for branch in branches] | |
308 | if "master" in branches: | |
309 | branch_name = "master" | |
310 | elif not branches: | |
311 | branch_name = None | |
312 | else: | |
313 | # Pick the first branch that is returned. Good or bad. | |
314 | branch_name = branches[0] | |
315 | ||
316 | pieces["branch"] = branch_name | |
248 | 317 | |
249 | 318 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] |
250 | 319 | # TAG might have hyphens. |
262 | 331 | # TAG-NUM-gHEX |
263 | 332 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) |
264 | 333 | if not mo: |
265 | # unparseable. Maybe git-describe is misbehaving? | |
334 | # unparsable. Maybe git-describe is misbehaving? | |
266 | 335 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out |
267 | 336 | return pieces |
268 | 337 | |
288 | 357 | else: |
289 | 358 | # HEX: no tags |
290 | 359 | pieces["closest-tag"] = None |
291 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) | |
360 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) | |
292 | 361 | pieces["distance"] = int(count_out) # total number of commits |
362 | ||
363 | # commit date: see ISO-8601 comment in git_versions_from_keywords() | |
364 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() | |
365 | # Use only the last line. Previous lines may contain GPG signature | |
366 | # information. | |
367 | date = date.splitlines()[-1] | |
368 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
293 | 369 | |
294 | 370 | return pieces |
295 | 371 | |
325 | 401 | return rendered |
326 | 402 | |
327 | 403 | |
404 | def render_pep440_branch(pieces): | |
405 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . | |
406 | ||
407 | The ".dev0" means not master branch. Note that .dev0 sorts backwards | |
408 | (a feature branch will appear "older" than the master branch). | |
409 | ||
410 | Exceptions: | |
411 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] | |
412 | """ | |
413 | if pieces["closest-tag"]: | |
414 | rendered = pieces["closest-tag"] | |
415 | if pieces["distance"] or pieces["dirty"]: | |
416 | if pieces["branch"] != "master": | |
417 | rendered += ".dev0" | |
418 | rendered += plus_or_dot(pieces) | |
419 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) | |
420 | if pieces["dirty"]: | |
421 | rendered += ".dirty" | |
422 | else: | |
423 | # exception #1 | |
424 | rendered = "0" | |
425 | if pieces["branch"] != "master": | |
426 | rendered += ".dev0" | |
427 | rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) | |
428 | if pieces["dirty"]: | |
429 | rendered += ".dirty" | |
430 | return rendered | |
431 | ||
432 | ||
433 | def pep440_split_post(ver): | |
434 | """Split pep440 version string at the post-release segment. | |
435 | ||
436 | Returns the release segments before the post-release and the | |
437 | post-release version number (or -1 if no post-release segment is present). | |
438 | """ | |
439 | vc = str.split(ver, ".post") | |
440 | return vc[0], int(vc[1] or 0) if len(vc) == 2 else None | |
441 | ||
442 | ||
328 | 443 | def render_pep440_pre(pieces): |
329 | """TAG[.post.devDISTANCE] -- No -dirty. | |
330 | ||
331 | Exceptions: | |
332 | 1: no tags. 0.post.devDISTANCE | |
333 | """ | |
334 | if pieces["closest-tag"]: | |
335 | rendered = pieces["closest-tag"] | |
444 | """TAG[.postN.devDISTANCE] -- No -dirty. | |
445 | ||
446 | Exceptions: | |
447 | 1: no tags. 0.post0.devDISTANCE | |
448 | """ | |
449 | if pieces["closest-tag"]: | |
336 | 450 | if pieces["distance"]: |
337 | rendered += ".post.dev%d" % pieces["distance"] | |
338 | else: | |
339 | # exception #1 | |
340 | rendered = "0.post.dev%d" % pieces["distance"] | |
451 | # update the post release segment | |
452 | tag_version, post_version = pep440_split_post(pieces["closest-tag"]) | |
453 | rendered = tag_version | |
454 | if post_version is not None: | |
455 | rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) | |
456 | else: | |
457 | rendered += ".post0.dev%d" % (pieces["distance"]) | |
458 | else: | |
459 | # no commits, use the tag as the version | |
460 | rendered = pieces["closest-tag"] | |
461 | else: | |
462 | # exception #1 | |
463 | rendered = "0.post0.dev%d" % pieces["distance"] | |
341 | 464 | return rendered |
342 | 465 | |
343 | 466 | |
365 | 488 | if pieces["dirty"]: |
366 | 489 | rendered += ".dev0" |
367 | 490 | rendered += "+g%s" % pieces["short"] |
491 | return rendered | |
492 | ||
493 | ||
494 | def render_pep440_post_branch(pieces): | |
495 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . | |
496 | ||
497 | The ".dev0" means not master branch. | |
498 | ||
499 | Exceptions: | |
500 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] | |
501 | """ | |
502 | if pieces["closest-tag"]: | |
503 | rendered = pieces["closest-tag"] | |
504 | if pieces["distance"] or pieces["dirty"]: | |
505 | rendered += ".post%d" % pieces["distance"] | |
506 | if pieces["branch"] != "master": | |
507 | rendered += ".dev0" | |
508 | rendered += plus_or_dot(pieces) | |
509 | rendered += "g%s" % pieces["short"] | |
510 | if pieces["dirty"]: | |
511 | rendered += ".dirty" | |
512 | else: | |
513 | # exception #1 | |
514 | rendered = "0.post%d" % pieces["distance"] | |
515 | if pieces["branch"] != "master": | |
516 | rendered += ".dev0" | |
517 | rendered += "+g%s" % pieces["short"] | |
518 | if pieces["dirty"]: | |
519 | rendered += ".dirty" | |
368 | 520 | return rendered |
369 | 521 | |
370 | 522 | |
438 | 590 | "full-revisionid": pieces.get("long"), |
439 | 591 | "dirty": None, |
440 | 592 | "error": pieces["error"], |
593 | "date": None, | |
441 | 594 | } |
442 | 595 | |
443 | 596 | if not style or style == "default": |
445 | 598 | |
446 | 599 | if style == "pep440": |
447 | 600 | rendered = render_pep440(pieces) |
601 | elif style == "pep440-branch": | |
602 | rendered = render_pep440_branch(pieces) | |
448 | 603 | elif style == "pep440-pre": |
449 | 604 | rendered = render_pep440_pre(pieces) |
450 | 605 | elif style == "pep440-post": |
451 | 606 | rendered = render_pep440_post(pieces) |
607 | elif style == "pep440-post-branch": | |
608 | rendered = render_pep440_post_branch(pieces) | |
452 | 609 | elif style == "pep440-old": |
453 | 610 | rendered = render_pep440_old(pieces) |
454 | 611 | elif style == "git-describe": |
463 | 620 | "full-revisionid": pieces["long"], |
464 | 621 | "dirty": pieces["dirty"], |
465 | 622 | "error": None, |
623 | "date": pieces.get("date"), | |
466 | 624 | } |
467 | 625 | |
468 | 626 | |
486 | 644 | # versionfile_source is the relative path from the top of the source |
487 | 645 | # tree (where the .git directory might live) to this file. Invert |
488 | 646 | # this to find the root from __file__. |
489 | for i in cfg.versionfile_source.split("/"): | |
647 | for _ in cfg.versionfile_source.split("/"): | |
490 | 648 | root = os.path.dirname(root) |
491 | 649 | except NameError: |
492 | 650 | return { |
494 | 652 | "full-revisionid": None, |
495 | 653 | "dirty": None, |
496 | 654 | "error": "unable to find root of source tree", |
655 | "date": None, | |
497 | 656 | } |
498 | 657 | |
499 | 658 | try: |
513 | 672 | "full-revisionid": None, |
514 | 673 | "dirty": None, |
515 | 674 | "error": "unable to compute version", |
675 | "date": None, | |
516 | 676 | } |
0 | from collections.abc import Iterable | |
1 | 0 | import numbers |
2 | 1 | import operator |
3 | 2 | import warnings |
358 | 357 | if isinstance(idx, numbers.Integral): |
359 | 358 | return _geom_to_shapely(self.data[idx]) |
360 | 359 | # array-like, slice |
361 | if compat.PANDAS_GE_10: | |
362 | # for pandas >= 1.0, validate and convert IntegerArray/BooleanArray | |
363 | # to numpy array, pass-through non-array-like indexers | |
364 | idx = pd.api.indexers.check_array_indexer(self, idx) | |
365 | if isinstance(idx, (Iterable, slice)): | |
366 | return GeometryArray(self.data[idx], crs=self.crs) | |
367 | else: | |
368 | raise TypeError("Index type not supported", idx) | |
360 | # validate and convert IntegerArray/BooleanArray | |
361 | # to numpy array, pass-through non-array-like indexers | |
362 | idx = pd.api.indexers.check_array_indexer(self, idx) | |
363 | return GeometryArray(self.data[idx], crs=self.crs) | |
369 | 364 | |
370 | 365 | def __setitem__(self, key, value): |
371 | if compat.PANDAS_GE_10: | |
372 | # for pandas >= 1.0, validate and convert IntegerArray/BooleanArray | |
373 | # keys to numpy array, pass-through non-array-like indexers | |
374 | key = pd.api.indexers.check_array_indexer(self, key) | |
366 | # validate and convert IntegerArray/BooleanArray | |
367 | # keys to numpy array, pass-through non-array-like indexers | |
368 | key = pd.api.indexers.check_array_indexer(self, key) | |
375 | 369 | if isinstance(value, pd.Series): |
376 | 370 | value = value.values |
371 | if isinstance(value, pd.DataFrame): | |
372 | value = value.values.flatten() | |
377 | 373 | if isinstance(value, (list, np.ndarray)): |
378 | 374 | value = from_shapely(value) |
379 | 375 | if isinstance(value, GeometryArray): |
419 | 415 | return self.__dict__ |
420 | 416 | |
421 | 417 | def __setstate__(self, state): |
422 | if compat.USE_PYGEOS: | |
423 | geoms = pygeos.from_wkb(state[0]) | |
418 | if not isinstance(state, dict): | |
419 | # pickle file saved with pygeos | |
420 | geoms = vectorized.from_wkb(state[0]) | |
424 | 421 | self._crs = state[1] |
425 | 422 | self._sindex = None # pygeos.STRtree could not be pickled yet |
426 | 423 | self.data = geoms |
427 | 424 | self.base = None |
428 | 425 | else: |
426 | if compat.USE_PYGEOS: | |
427 | state["data"] = vectorized.from_shapely(state["data"]) | |
429 | 428 | if "_crs" not in state: |
430 | 429 | state["_crs"] = None |
431 | 430 | self.__dict__.update(state) |
560 | 559 | return self.geom_equals_exact(other, 0.5 * 10 ** (-decimal)) |
561 | 560 | # return _binary_predicate("almost_equals", self, other, decimal=decimal) |
562 | 561 | |
563 | def equals_exact(self, other, tolerance): | |
564 | warnings.warn( | |
565 | "GeometryArray.equals_exact() is now GeometryArray.geom_equals_exact(). " | |
566 | "GeometryArray.equals_exact() will be deprecated in the future.", | |
567 | FutureWarning, | |
568 | stacklevel=2, | |
569 | ) | |
570 | return self._binary_method("equals_exact", self, other, tolerance=tolerance) | |
571 | ||
572 | def almost_equals(self, other, decimal): | |
573 | warnings.warn( | |
574 | "GeometryArray.almost_equals() is now GeometryArray.geom_almost_equals(). " | |
575 | "GeometryArray.almost_equals() will be deprecated in the future.", | |
576 | FutureWarning, | |
577 | stacklevel=2, | |
578 | ) | |
579 | return self.geom_equals_exact(other, 0.5 * 10 ** (-decimal)) | |
580 | ||
581 | 562 | # |
582 | 563 | # Binary operations that return new geometries |
583 | 564 | # |
565 | ||
566 | def clip_by_rect(self, xmin, ymin, xmax, ymax): | |
567 | return GeometryArray( | |
568 | vectorized.clip_by_rect(self.data, xmin, ymin, xmax, ymax), crs=self.crs | |
569 | ) | |
584 | 570 | |
585 | 571 | def difference(self, other): |
586 | 572 | return GeometryArray( |
739 | 725 | |
740 | 726 | >>> a = a.to_crs(3857) |
741 | 727 | >>> to_wkt(a) |
742 | array(['POINT (111319 111325)', 'POINT (222639 222684)', | |
743 | 'POINT (333958 334111)'], dtype=object) | |
728 | array(['POINT (111319.490793 111325.142866)', | |
729 | 'POINT (222638.981587 222684.208506)', | |
730 | 'POINT (333958.47238 334111.171402)'], dtype=object) | |
744 | 731 | >>> a.crs # doctest: +SKIP |
745 | 732 | <Projected CRS: EPSG:3857> |
746 | 733 | Name: WGS 84 / Pseudo-Mercator |
878 | 865 | def x(self): |
879 | 866 | """Return the x location of point geometries in a GeoSeries""" |
880 | 867 | if (self.geom_type[~self.isna()] == "Point").all(): |
881 | return vectorized.get_x(self.data) | |
868 | empty = self.is_empty | |
869 | if empty.any(): | |
870 | nonempty = ~empty | |
871 | coords = np.full_like(nonempty, dtype=float, fill_value=np.nan) | |
872 | coords[nonempty] = vectorized.get_x(self.data[nonempty]) | |
873 | return coords | |
874 | else: | |
875 | return vectorized.get_x(self.data) | |
882 | 876 | else: |
883 | 877 | message = "x attribute access only provided for Point geometries" |
884 | 878 | raise ValueError(message) |
887 | 881 | def y(self): |
888 | 882 | """Return the y location of point geometries in a GeoSeries""" |
889 | 883 | if (self.geom_type[~self.isna()] == "Point").all(): |
890 | return vectorized.get_y(self.data) | |
884 | empty = self.is_empty | |
885 | if empty.any(): | |
886 | nonempty = ~empty | |
887 | coords = np.full_like(nonempty, dtype=float, fill_value=np.nan) | |
888 | coords[nonempty] = vectorized.get_y(self.data[nonempty]) | |
889 | return coords | |
890 | else: | |
891 | return vectorized.get_y(self.data) | |
891 | 892 | else: |
892 | 893 | message = "y attribute access only provided for Point geometries" |
893 | 894 | raise ValueError(message) |
896 | 897 | def z(self): |
897 | 898 | """Return the z location of point geometries in a GeoSeries""" |
898 | 899 | if (self.geom_type[~self.isna()] == "Point").all(): |
899 | return vectorized.get_z(self.data) | |
900 | empty = self.is_empty | |
901 | if empty.any(): | |
902 | nonempty = ~empty | |
903 | coords = np.full_like(nonempty, dtype=float, fill_value=np.nan) | |
904 | coords[nonempty] = vectorized.get_z(self.data[nonempty]) | |
905 | return coords | |
906 | else: | |
907 | return vectorized.get_z(self.data) | |
900 | 908 | else: |
901 | 909 | message = "z attribute access only provided for Point geometries" |
902 | 910 | raise ValueError(message) |
1044 | 1052 | dtype |
1045 | 1053 | ): |
1046 | 1054 | string_values = to_wkt(self) |
1047 | if compat.PANDAS_GE_10: | |
1048 | pd_dtype = pd.api.types.pandas_dtype(dtype) | |
1049 | if isinstance(pd_dtype, pd.StringDtype): | |
1050 | # ensure to return a pandas string array instead of numpy array | |
1051 | return pd.array(string_values, dtype=pd_dtype) | |
1055 | pd_dtype = pd.api.types.pandas_dtype(dtype) | |
1056 | if isinstance(pd_dtype, pd.StringDtype): | |
1057 | # ensure to return a pandas string array instead of numpy array | |
1058 | return pd.array(string_values, dtype=pd_dtype) | |
1052 | 1059 | return string_values.astype(dtype, copy=False) |
1053 | 1060 | else: |
1054 | 1061 | return np.array(self, dtype=dtype, copy=copy) |
1181 | 1188 | Returns |
1182 | 1189 | ------- |
1183 | 1190 | values : ndarray |
1184 | An array suitable for factoraization. This should maintain order | |
1191 | An array suitable for factorization. This should maintain order | |
1185 | 1192 | and be a supported dtype (Float64, Int64, UInt64, String, Object). |
1186 | 1193 | By default, the extension array is cast to object dtype. |
1187 | 1194 | na_value : object |
973 | 973 | other : GeoSeries or geometric object |
974 | 974 | The GeoSeries (elementwise) or geometric object to compare to. |
975 | 975 | decimal : int |
976 | Decimal place presion used when testing for approximate equality. | |
976 | Decimal place precision used when testing for approximate equality. | |
977 | 977 | align : bool (default True) |
978 | 978 | If True, automatically aligns GeoSeries based on their indices. |
979 | 979 | If False, the order of elements is preserved. |
1041 | 1041 | other : GeoSeries or geometric object |
1042 | 1042 | The GeoSeries (elementwise) or geometric object to compare to. |
1043 | 1043 | tolerance : float |
1044 | Decimal place presion used when testing for approximate equality. | |
1044 | Decimal place precision used when testing for approximate equality. | |
1045 | 1045 | align : bool (default True) |
1046 | 1046 | If True, automatically aligns GeoSeries based on their indices. |
1047 | 1047 | If False, the order of elements is preserved. |
2534 | 2534 | GeoSeries.union |
2535 | 2535 | """ |
2536 | 2536 | return _binary_geo("intersection", self, other, align) |
2537 | ||
2538 | def clip_by_rect(self, xmin, ymin, xmax, ymax): | |
2539 | """Returns a ``GeoSeries`` of the portions of geometry within the given | |
2540 | rectangle. | |
2541 | ||
2542 | Note that the results are not exactly equal to | |
2543 | :meth:`~GeoSeries.intersection()`. E.g. in edge cases, | |
2544 | :meth:`~GeoSeries.clip_by_rect()` will not return a point just touching the | |
2545 | rectangle. Check the examples section below for some of these exceptions. | |
2546 | ||
2547 | The geometry is clipped in a fast but possibly dirty way. The output is not | |
2548 | guaranteed to be valid. No exceptions will be raised for topological errors. | |
2549 | ||
2550 | Note: empty geometries or geometries that do not overlap with the specified | |
2551 | bounds will result in ``GEOMETRYCOLLECTION EMPTY``. | |
2552 | ||
2553 | Parameters | |
2554 | ---------- | |
2555 | xmin: float | |
2556 | Minimum x value of the rectangle | |
2557 | ymin: float | |
2558 | Minimum y value of the rectangle | |
2559 | xmax: float | |
2560 | Maximum x value of the rectangle | |
2561 | ymax: float | |
2562 | Maximum y value of the rectangle | |
2563 | ||
2564 | Returns | |
2565 | ------- | |
2566 | GeoSeries | |
2567 | ||
2568 | Examples | |
2569 | -------- | |
2570 | >>> from shapely.geometry import Polygon, LineString, Point | |
2571 | >>> s = geopandas.GeoSeries( | |
2572 | ... [ | |
2573 | ... Polygon([(0, 0), (2, 2), (0, 2)]), | |
2574 | ... Polygon([(0, 0), (2, 2), (0, 2)]), | |
2575 | ... LineString([(0, 0), (2, 2)]), | |
2576 | ... LineString([(2, 0), (0, 2)]), | |
2577 | ... Point(0, 1), | |
2578 | ... ], | |
2579 | ... crs=3857, | |
2580 | ... ) | |
2581 | >>> bounds = (0, 0, 1, 1) | |
2582 | >>> s | |
2583 | 0 POLYGON ((0.000 0.000, 2.000 2.000, 0.000 2.00... | |
2584 | 1 POLYGON ((0.000 0.000, 2.000 2.000, 0.000 2.00... | |
2585 | 2 LINESTRING (0.000 0.000, 2.000 2.000) | |
2586 | 3 LINESTRING (2.000 0.000, 0.000 2.000) | |
2587 | 4 POINT (0.000 1.000) | |
2588 | dtype: geometry | |
2589 | >>> s.clip_by_rect(*bounds) | |
2590 | 0 POLYGON ((0.000 0.000, 0.000 1.000, 1.000 1.00... | |
2591 | 1 POLYGON ((0.000 0.000, 0.000 1.000, 1.000 1.00... | |
2592 | 2 LINESTRING (0.000 0.000, 1.000 1.000) | |
2593 | 3 GEOMETRYCOLLECTION EMPTY | |
2594 | 4 GEOMETRYCOLLECTION EMPTY | |
2595 | dtype: geometry | |
2596 | ||
2597 | See also | |
2598 | -------- | |
2599 | GeoSeries.intersection | |
2600 | """ | |
2601 | from .geoseries import GeoSeries | |
2602 | ||
2603 | geometry_array = GeometryArray(self.geometry.values) | |
2604 | clipped_geometry = geometry_array.clip_by_rect(xmin, ymin, xmax, ymax) | |
2605 | return GeoSeries(clipped_geometry.data, index=self.index, crs=self.crs) | |
2537 | 2606 | |
2538 | 2607 | # |
2539 | 2608 | # Other operations |
24 | 24 | |
25 | 25 | This dataset is being provided by the Department of City Planning (DCP) on DCP’s website for informational purposes only. DCP does not warranty the completeness, accuracy, content, or fitness for any particular purpose or use of the dataset, nor are any such warranties to be implied or inferred with respect to the dataset as furnished on the website. DCP and the City are not liable for any deficiencies in the completeness, accuracy, content, or fitness for any particular purpose or use the dataset, or applications utilizing the dataset, provided by any third party. |
26 | 26 | |
27 | ### `naturalearth_lowres` | |
27 | 28 | |
29 | #### Notes | |
30 | ||
31 | - `gdp_md_est` is `GDP_MD` in source data set | |
32 | - `iso_a3` have been overridden with `ADM0_A3` if source value was **-99** and row corresponds to **Sovereign country**, or **Country** |
0 | 0 | """ |
1 | 1 | Script that generates the included dataset 'naturalearth_lowres.shp'. |
2 | 2 | |
3 | Raw data: https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/ | |
4 | Current version used: version 4.1.0 | |
3 | Raw data: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_0_countries.zip | |
4 | Current version used: version 5.0.1 | |
5 | 5 | """ # noqa (E501 link is longer than max line length) |
6 | 6 | |
7 | 7 | import geopandas as gpd |
8 | 8 | |
9 | 9 | # assumes zipfile from naturalearthdata was downloaded to current directory |
10 | 10 | world_raw = gpd.read_file("zip://./ne_110m_admin_0_countries.zip") |
11 | ||
12 | # not ideal - fix some country codes | |
13 | mask = world_raw["ISO_A3"].eq("-99") & world_raw["TYPE"].isin( | |
14 | ["Sovereign country", "Country"] | |
15 | ) | |
16 | world_raw.loc[mask, "ISO_A3"] = world_raw.loc[mask, "ADM0_A3"] | |
17 | ||
11 | 18 | # subsets columns of interest for geopandas examples |
12 | 19 | world_df = world_raw[ |
13 | ["POP_EST", "CONTINENT", "NAME", "ISO_A3", "GDP_MD_EST", "geometry"] | |
14 | ] | |
20 | ["POP_EST", "CONTINENT", "NAME", "ISO_A3", "GDP_MD", "geometry"] | |
21 | ].rename( | |
22 | columns={"GDP_MD": "GDP_MD_EST"} | |
23 | ) # column has changed name... | |
15 | 24 | world_df.columns = world_df.columns.str.lower() |
25 | ||
16 | 26 | world_df.to_file( |
17 | 27 | driver="ESRI Shapefile", filename="./naturalearth_lowres/naturalearth_lowres.shp" |
18 | 28 | ) |
Binary diff not shown
0 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]⏎ | |
0 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]⏎ |
56 | 56 | tooltip_kwds={}, |
57 | 57 | popup_kwds={}, |
58 | 58 | legend_kwds={}, |
59 | map_kwds={}, | |
59 | 60 | **kwargs, |
60 | 61 | ): |
61 | 62 | """Interactive map based on GeoPandas and folium/leaflet.js |
181 | 182 | Fill color. Defaults to the value of the color option |
182 | 183 | fillOpacity : float (default 0.5) |
183 | 184 | Fill opacity. |
185 | style_function : callable | |
186 | Function mapping a GeoJson Feature to a style ``dict``. | |
187 | ||
188 | * Style properties :func:`folium.vector_layers.path_options` | |
189 | * GeoJson features :class:`GeoDataFrame.__geo_interface__` | |
190 | ||
191 | e.g.:: | |
192 | ||
193 | lambda x: {"color":"red" if x["properties"]["gdp_md_est"]<10**6 | |
194 | else "blue"} | |
184 | 195 | |
185 | 196 | Plus all supported by :func:`folium.vector_layers.path_options`. See the |
186 | 197 | documentation of :class:`folium.features.GeoJson` for details. |
224 | 235 | Applies if ``colorbar=False``. |
225 | 236 | max_labels : int, default 10 |
226 | 237 | Maximum number of colorbar tick labels (requires branca>=0.5.0) |
238 | map_kwds : dict (default {}) | |
239 | Additional keywords to be passed to folium :class:`~folium.folium.Map`, | |
240 | e.g. ``dragging``, or ``scrollWheelZoom``. | |
241 | ||
227 | 242 | |
228 | 243 | **kwargs : dict |
229 | 244 | Additional options to be passed on to the folium object. |
301 | 316 | fit = False |
302 | 317 | |
303 | 318 | # get a subset of kwargs to be passed to folium.Map |
304 | map_kwds = {i: kwargs[i] for i in kwargs.keys() if i in _MAP_KWARGS} | |
319 | for i in _MAP_KWARGS: | |
320 | if i in map_kwds: | |
321 | raise ValueError( | |
322 | f"'{i}' cannot be specified in 'map_kwds'. " | |
323 | f"Use the '{i}={map_kwds[i]}' argument instead." | |
324 | ) | |
325 | map_kwds = { | |
326 | **map_kwds, | |
327 | **{i: kwargs[i] for i in kwargs.keys() if i in _MAP_KWARGS}, | |
328 | } | |
305 | 329 | |
306 | 330 | if HAS_XYZSERVICES: |
307 | 331 | # match provider name string to xyzservices.TileProvider |
352 | 376 | "Cannot specify 'categories' when column has categorical dtype" |
353 | 377 | ) |
354 | 378 | categorical = True |
355 | elif gdf[column].dtype is np.dtype("O") or categories: | |
379 | elif ( | |
380 | gdf[column].dtype is np.dtype("O") | |
381 | or gdf[column].dtype is np.dtype(bool) | |
382 | or categories | |
383 | ): | |
356 | 384 | categorical = True |
357 | 385 | |
358 | 386 | nan_idx = pd.isna(gdf[column]) |
429 | 457 | style_kwds["fillOpacity"] = 0.5 |
430 | 458 | if "weight" not in style_kwds: |
431 | 459 | style_kwds["weight"] = 2 |
460 | if "style_function" in style_kwds: | |
461 | style_kwds_function = style_kwds["style_function"] | |
462 | if not callable(style_kwds_function): | |
463 | raise ValueError("'style_function' has to be a callable") | |
464 | style_kwds.pop("style_function") | |
465 | else: | |
466 | ||
467 | def _no_style(x): | |
468 | return {} | |
469 | ||
470 | style_kwds_function = _no_style | |
432 | 471 | |
433 | 472 | # specify color |
434 | 473 | if color is not None: |
439 | 478 | ): # use existing column |
440 | 479 | |
441 | 480 | def _style_color(x): |
442 | return { | |
481 | base_style = { | |
443 | 482 | "fillColor": x["properties"][color], |
444 | 483 | **style_kwds, |
484 | } | |
485 | return { | |
486 | **base_style, | |
487 | **style_kwds_function(x), | |
445 | 488 | } |
446 | 489 | |
447 | 490 | style_function = _style_color |
461 | 504 | if not stroke_color: |
462 | 505 | |
463 | 506 | def _style_column(x): |
464 | return { | |
507 | base_style = { | |
465 | 508 | "fillColor": x["properties"]["__folium_color"], |
466 | 509 | "color": x["properties"]["__folium_color"], |
467 | 510 | **style_kwds, |
468 | 511 | } |
512 | return { | |
513 | **base_style, | |
514 | **style_kwds_function(x), | |
515 | } | |
469 | 516 | |
470 | 517 | style_function = _style_column |
471 | 518 | else: |
472 | 519 | |
473 | 520 | def _style_stroke(x): |
474 | return { | |
521 | base_style = { | |
475 | 522 | "fillColor": x["properties"]["__folium_color"], |
476 | 523 | "color": stroke_color, |
477 | 524 | **style_kwds, |
478 | 525 | } |
526 | return { | |
527 | **base_style, | |
528 | **style_kwds_function(x), | |
529 | } | |
479 | 530 | |
480 | 531 | style_function = _style_stroke |
481 | 532 | else: # use folium default |
482 | 533 | |
483 | 534 | def _style_default(x): |
484 | return {**style_kwds} | |
535 | return {**style_kwds, **style_kwds_function(x)} | |
485 | 536 | |
486 | 537 | style_function = _style_default |
487 | 538 | |
525 | 576 | ] |
526 | 577 | gdf = gdf.drop(columns=non_active_geoms) |
527 | 578 | |
528 | # preprare tooltip and popup | |
579 | # prepare tooltip and popup | |
529 | 580 | if isinstance(gdf, geopandas.GeoDataFrame): |
530 | 581 | # add named index to the tooltip |
531 | 582 | if gdf.index.name is not None: |
795 | 846 | marker_kwds={}, |
796 | 847 | style_kwds={}, |
797 | 848 | highlight_kwds={}, |
849 | map_kwds={}, | |
798 | 850 | **kwargs, |
799 | 851 | ): |
800 | 852 | """Interactive map based on GeoPandas and folium/leaflet.js |
865 | 917 | Fill color. Defaults to the value of the color option |
866 | 918 | fillOpacity : float (default 0.5) |
867 | 919 | Fill opacity. |
920 | style_function : callable | |
921 | Function mapping a GeoJson Feature to a style ``dict``. | |
922 | ||
923 | * Style properties :func:`folium.vector_layers.path_options` | |
924 | * GeoJson features :class:`GeoSeries.__geo_interface__` | |
925 | ||
926 | e.g.:: | |
927 | ||
928 | lambda x: {"color":"red" if x["properties"]["gdp_md_est"]<10**6 | |
929 | else "blue"} | |
930 | ||
868 | 931 | |
869 | 932 | Plus all supported by :func:`folium.vector_layers.path_options`. See the |
870 | 933 | documentation of :class:`folium.features.GeoJson` for details. |
872 | 935 | highlight_kwds : dict (default {}) |
873 | 936 | Style to be passed to folium highlight_function. Uses the same keywords |
874 | 937 | as ``style_kwds``. When empty, defaults to ``{"fillOpacity": 0.75}``. |
938 | map_kwds : dict (default {}) | |
939 | Additional keywords to be passed to folium :class:`~folium.folium.Map`, | |
940 | e.g. ``dragging``, or ``scrollWheelZoom``. | |
875 | 941 | |
876 | 942 | **kwargs : dict |
877 | 943 | Additional options to be passed on to the folium. |
896 | 962 | marker_kwds=marker_kwds, |
897 | 963 | style_kwds=style_kwds, |
898 | 964 | highlight_kwds=highlight_kwds, |
965 | map_kwds=map_kwds, | |
899 | 966 | **kwargs, |
900 | 967 | ) |
22 | 22 | DEFAULT_GEO_COLUMN_NAME = "geometry" |
23 | 23 | |
24 | 24 | |
25 | def _geodataframe_constructor_with_fallback(*args, **kwargs): | |
26 | """ | |
27 | A flexible constructor for GeoDataFrame._constructor, which falls back | |
28 | to returning a DataFrame (if a certain operation does not preserve the | |
29 | geometry column) | |
30 | """ | |
31 | df = GeoDataFrame(*args, **kwargs) | |
32 | geometry_cols_mask = df.dtypes == "geometry" | |
33 | if len(geometry_cols_mask) == 0 or geometry_cols_mask.sum() == 0: | |
34 | df = pd.DataFrame(df) | |
35 | ||
36 | return df | |
37 | ||
38 | ||
25 | 39 | def _ensure_geometry(data, crs=None): |
26 | 40 | """ |
27 | 41 | Ensure the data is of geometry dtype or converted to it. |
34 | 48 | if is_geometry_type(data): |
35 | 49 | if isinstance(data, Series): |
36 | 50 | data = GeoSeries(data) |
37 | if data.crs is None: | |
51 | if data.crs is None and crs is not None: | |
52 | # Avoids caching issues/crs sharing issues | |
53 | data = data.copy() | |
38 | 54 | data.crs = crs |
39 | 55 | return data |
40 | 56 | else: |
46 | 62 | return out |
47 | 63 | |
48 | 64 | |
49 | def _crs_mismatch_warning(): | |
50 | # TODO: raise error in 0.9 or 0.10. | |
51 | warnings.warn( | |
52 | "CRS mismatch between CRS of the passed geometries " | |
53 | "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " | |
54 | "allow_override=True)' to overwrite CRS or " | |
55 | "'GeoDataFrame.to_crs(crs)' to reproject geometries. " | |
56 | "CRS mismatch will raise an error in the future versions " | |
57 | "of GeoPandas.", | |
58 | FutureWarning, | |
59 | stacklevel=3, | |
60 | ) | |
65 | crs_mismatch_error = ( | |
66 | "CRS mismatch between CRS of the passed geometries " | |
67 | "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " | |
68 | "allow_override=True)' to overwrite CRS or " | |
69 | "'GeoDataFrame.to_crs(crs)' to reproject geometries. " | |
70 | ) | |
61 | 71 | |
62 | 72 | |
63 | 73 | class GeoDataFrame(GeoPandasBase, DataFrame): |
112 | 122 | GeoSeries : Series object designed to store shapely geometry objects |
113 | 123 | """ |
114 | 124 | |
125 | # TODO: remove "_crs" in 0.12 | |
115 | 126 | _metadata = ["_crs", "_geometry_column_name"] |
116 | 127 | |
117 | 128 | _geometry_column_name = DEFAULT_GEO_COLUMN_NAME |
120 | 131 | with compat.ignore_shapely2_warnings(): |
121 | 132 | super().__init__(data, *args, **kwargs) |
122 | 133 | |
123 | # need to set this before calling self['geometry'], because | |
124 | # getitem accesses crs | |
125 | self._crs = CRS.from_user_input(crs) if crs else None | |
134 | # TODO: to be removed in 0.12 | |
135 | self._crs = None | |
126 | 136 | |
127 | 137 | # set_geometry ensures the geometry data have the proper dtype, |
128 | 138 | # but is not called if `geometry=None` ('geometry' column present |
135 | 145 | if geometry is None and isinstance(data, GeoDataFrame): |
136 | 146 | self._geometry_column_name = data._geometry_column_name |
137 | 147 | if crs is not None and data.crs != crs: |
138 | _crs_mismatch_warning() | |
139 | # TODO: raise error in 0.9 or 0.10. | |
140 | return | |
148 | raise ValueError(crs_mismatch_error) | |
141 | 149 | |
142 | 150 | if geometry is None and "geometry" in self.columns: |
143 | 151 | # Check for multiple columns with name "geometry". If there are, |
150 | 158 | ) |
151 | 159 | |
152 | 160 | # only if we have actual geometry values -> call set_geometry |
153 | index = self.index | |
154 | 161 | try: |
155 | 162 | if ( |
156 | 163 | hasattr(self["geometry"].values, "crs") |
158 | 165 | and crs |
159 | 166 | and not self["geometry"].values.crs == crs |
160 | 167 | ): |
161 | _crs_mismatch_warning() | |
162 | # TODO: raise error in 0.9 or 0.10. | |
168 | raise ValueError(crs_mismatch_error) | |
163 | 169 | self["geometry"] = _ensure_geometry(self["geometry"].values, crs) |
164 | 170 | except TypeError: |
165 | 171 | pass |
166 | 172 | else: |
167 | if self.index is not index: | |
168 | # With pandas < 1.0 and an empty frame (no rows), the index | |
169 | # gets reset to a default RangeIndex -> set back the original | |
170 | # index if needed | |
171 | self.index = index | |
172 | 173 | geometry = "geometry" |
173 | 174 | |
174 | 175 | if geometry is not None: |
178 | 179 | and crs |
179 | 180 | and not geometry.crs == crs |
180 | 181 | ): |
181 | _crs_mismatch_warning() | |
182 | # TODO: raise error in 0.9 or 0.10. | |
183 | self.set_geometry(geometry, inplace=True) | |
182 | raise ValueError(crs_mismatch_error) | |
183 | ||
184 | self.set_geometry(geometry, inplace=True, crs=crs) | |
184 | 185 | |
185 | 186 | if geometry is None and crs: |
186 | warnings.warn( | |
187 | "Assigning CRS to a GeoDataFrame without a geometry column is now " | |
188 | "deprecated and will not be supported in the future.", | |
189 | FutureWarning, | |
190 | stacklevel=2, | |
187 | raise ValueError( | |
188 | "Assigning CRS to a GeoDataFrame without a geometry column is not " | |
189 | "supported. Supply geometry using the 'geometry=' keyword argument, " | |
190 | "or by providing a DataFrame with column name 'geometry'", | |
191 | 191 | ) |
192 | 192 | |
193 | 193 | def __setattr__(self, attr, val): |
199 | 199 | |
200 | 200 | def _get_geometry(self): |
201 | 201 | if self._geometry_column_name not in self: |
202 | raise AttributeError( | |
203 | "No geometry data set yet (expected in" | |
204 | " column '%s'.)" % self._geometry_column_name | |
205 | ) | |
202 | if self._geometry_column_name is None: | |
203 | msg = ( | |
204 | "You are calling a geospatial method on the GeoDataFrame, " | |
205 | "but the active geometry column to use has not been set. " | |
206 | ) | |
207 | else: | |
208 | msg = ( | |
209 | "You are calling a geospatial method on the GeoDataFrame, " | |
210 | f"but the active geometry column ('{self._geometry_column_name}') " | |
211 | "is not present. " | |
212 | ) | |
213 | geo_cols = list(self.columns[self.dtypes == "geometry"]) | |
214 | if len(geo_cols) > 0: | |
215 | msg += ( | |
216 | f"\nThere are columns with geometry data type ({geo_cols}), and " | |
217 | "you can either set one as the active geometry with " | |
218 | 'df.set_geometry("name") or access the column as a ' | |
219 | 'GeoSeries (df["name"]) and call the method directly on it.' | |
220 | ) | |
221 | else: | |
222 | msg += ( | |
223 | "\nThere are no existing columns with geometry data type. You can " | |
224 | "add a geometry column as the active geometry column with " | |
225 | "df.set_geometry. " | |
226 | ) | |
227 | ||
228 | raise AttributeError(msg) | |
206 | 229 | return self[self._geometry_column_name] |
207 | 230 | |
208 | 231 | def _set_geometry(self, col): |
275 | 298 | frame = self |
276 | 299 | else: |
277 | 300 | frame = self.copy() |
301 | # if there is no previous self.geometry, self.copy() will downcast | |
302 | if type(frame) == DataFrame: | |
303 | frame = GeoDataFrame(frame) | |
278 | 304 | |
279 | 305 | to_remove = None |
280 | 306 | geo_column_name = self._geometry_column_name |
281 | 307 | if isinstance(col, (Series, list, np.ndarray, GeometryArray)): |
282 | 308 | level = col |
283 | elif hasattr(col, "ndim") and col.ndim != 1: | |
309 | elif hasattr(col, "ndim") and col.ndim > 1: | |
284 | 310 | raise ValueError("Must pass array with one dimension only.") |
285 | 311 | else: |
286 | 312 | try: |
305 | 331 | del frame[to_remove] |
306 | 332 | |
307 | 333 | if not crs: |
308 | level_crs = getattr(level, "crs", None) | |
309 | crs = level_crs if level_crs is not None else self._crs | |
334 | crs = getattr(level, "crs", None) | |
310 | 335 | |
311 | 336 | if isinstance(level, (GeoSeries, GeometryArray)) and level.crs != crs: |
312 | 337 | # Avoids caching issues/crs sharing issues |
315 | 340 | |
316 | 341 | # Check that we are using a listlike of geometries |
317 | 342 | level = _ensure_geometry(level, crs=crs) |
318 | index = frame.index | |
319 | 343 | frame[geo_column_name] = level |
320 | if frame.index is not index and len(frame.index) == len(index): | |
321 | # With pandas < 1.0 and an empty frame (no rows), the index gets reset | |
322 | # to a default RangeIndex -> set back the original index if needed | |
323 | frame.index = index | |
324 | 344 | frame._geometry_column_name = geo_column_name |
325 | frame.crs = crs | |
345 | ||
346 | # TODO: to be removed in 0.12 | |
347 | frame._crs = level.crs | |
326 | 348 | if not inplace: |
327 | 349 | return frame |
328 | 350 | |
404 | 426 | GeoDataFrame.to_crs : re-project to another CRS |
405 | 427 | |
406 | 428 | """ |
407 | return self._crs | |
429 | # TODO: remove try/except in 0.12 | |
430 | try: | |
431 | return self.geometry.crs | |
432 | except AttributeError: | |
433 | # the active geometry column might not be set | |
434 | warnings.warn( | |
435 | "Accessing CRS of a GeoDataFrame without a geometry column is " | |
436 | "deprecated and will be removed in GeoPandas 0.12. " | |
437 | "Use GeoDataFrame.set_geometry to set the active geometry column.", | |
438 | FutureWarning, | |
439 | stacklevel=2, | |
440 | ) | |
441 | return self._crs | |
408 | 442 | |
409 | 443 | @crs.setter |
410 | 444 | def crs(self, value): |
411 | 445 | """Sets the value of the crs""" |
412 | 446 | if self._geometry_column_name not in self: |
413 | warnings.warn( | |
414 | "Assigning CRS to a GeoDataFrame without a geometry column is now " | |
415 | "deprecated and will not be supported in the future.", | |
416 | FutureWarning, | |
417 | stacklevel=4, | |
447 | raise ValueError( | |
448 | "Assigning CRS to a GeoDataFrame without a geometry column is not " | |
449 | "supported. Use GeoDataFrame.set_geometry to set the active " | |
450 | "geometry column.", | |
418 | 451 | ) |
419 | self._crs = None if not value else CRS.from_user_input(value) | |
420 | 452 | else: |
421 | 453 | if hasattr(self.geometry.values, "crs"): |
422 | 454 | self.geometry.values.crs = value |
423 | self._crs = self.geometry.values.crs | |
424 | 455 | else: |
425 | 456 | # column called 'geometry' without geometry |
426 | 457 | self._crs = None if not value else CRS.from_user_input(value) |
427 | 458 | |
459 | # TODO: raise this error in 0.12. This already raises a FutureWarning | |
460 | # TODO: defined in the crs property above | |
461 | # raise ValueError( | |
462 | # "Assigning CRS to a GeoDataFrame without an active geometry " | |
463 | # "column is not supported. Use GeoDataFrame.set_geometry to set " | |
464 | # "the active geometry column.", | |
465 | # ) | |
466 | ||
428 | 467 | def __setstate__(self, state): |
429 | 468 | # overriding DataFrame method for compat with older pickles (CRS handling) |
469 | crs = None | |
430 | 470 | if isinstance(state, dict): |
431 | if "_metadata" in state and "crs" in state["_metadata"]: | |
432 | metadata = state["_metadata"] | |
433 | metadata[metadata.index("crs")] = "_crs" | |
434 | 471 | if "crs" in state and "_crs" not in state: |
435 | crs = state.pop("crs") | |
436 | state["_crs"] = CRS.from_user_input(crs) if crs is not None else crs | |
472 | crs = state.pop("crs", None) | |
473 | else: | |
474 | crs = state.pop("_crs", None) | |
475 | crs = CRS.from_user_input(crs) if crs is not None else crs | |
437 | 476 | |
438 | 477 | super().__setstate__(state) |
439 | 478 | |
441 | 480 | # at GeoDataFrame level with '_crs' (and not 'crs'), so without propagating |
442 | 481 | # to the GeoSeries/GeometryArray |
443 | 482 | try: |
444 | if self.crs is not None: | |
483 | if crs is not None: | |
445 | 484 | if self.geometry.values.crs is None: |
446 | self.crs = self.crs | |
485 | self.crs = crs | |
447 | 486 | except Exception: |
448 | 487 | pass |
449 | 488 | |
470 | 509 | GeoDataFrame |
471 | 510 | |
472 | 511 | """ |
473 | dataframe = super().from_dict(data, **kwargs) | |
474 | return GeoDataFrame(dataframe, geometry=geometry, crs=crs) | |
512 | dataframe = DataFrame.from_dict(data, **kwargs) | |
513 | return cls(dataframe, geometry=geometry, crs=crs) | |
475 | 514 | |
476 | 515 | @classmethod |
477 | 516 | def from_file(cls, filename, **kwargs): |
604 | 643 | "geometry": shape(feature["geometry"]) if feature["geometry"] else None |
605 | 644 | } |
606 | 645 | # load properties |
607 | row.update(feature["properties"]) | |
646 | properties = feature["properties"] | |
647 | if properties is None: | |
648 | properties = {} | |
649 | row.update(properties) | |
608 | 650 | rows.append(row) |
609 | return GeoDataFrame(rows, columns=columns, crs=crs) | |
651 | return cls(rows, columns=columns, crs=crs) | |
610 | 652 | |
611 | 653 | @classmethod |
612 | 654 | def from_postgis( |
836 | 878 | if not self.columns.is_unique: |
837 | 879 | raise ValueError("GeoDataFrame cannot contain duplicated column names.") |
838 | 880 | |
839 | properties_cols = self.columns.difference([self._geometry_column_name]) | |
881 | properties_cols = self.columns.drop(self._geometry_column_name) | |
840 | 882 | |
841 | 883 | if len(properties_cols) > 0: |
842 | 884 | # convert to object to get python scalars. |
952 | 994 | |
953 | 995 | return df |
954 | 996 | |
955 | def to_parquet(self, path, index=None, compression="snappy", **kwargs): | |
997 | def to_parquet( | |
998 | self, path, index=None, compression="snappy", version=None, **kwargs | |
999 | ): | |
956 | 1000 | """Write a GeoDataFrame to the Parquet format. |
957 | 1001 | |
958 | 1002 | Any geometry columns present are serialized to WKB format in the file. |
959 | 1003 | |
960 | 1004 | Requires 'pyarrow'. |
961 | 1005 | |
962 | WARNING: this is an initial implementation of Parquet file support and | |
963 | associated metadata. This is tracking version 0.1.0 of the metadata | |
964 | specification at: | |
965 | https://github.com/geopandas/geo-arrow-spec | |
1006 | WARNING: this is an early implementation of Parquet file support and | |
1007 | associated metadata, the specification for which continues to evolve. | |
1008 | This is tracking version 0.4.0 of the GeoParquet specification at: | |
1009 | https://github.com/opengeospatial/geoparquet | |
966 | 1010 | |
967 | 1011 | This metadata specification does not yet make stability promises. As such, |
968 | 1012 | we do not yet recommend using this in a production setting unless you are |
981 | 1025 | output except `RangeIndex` which is stored as metadata only. |
982 | 1026 | compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' |
983 | 1027 | Name of the compression to use. Use ``None`` for no compression. |
1028 | version : {'0.1.0', '0.4.0', None} | |
1029 | GeoParquet specification version; if not provided will default to | |
1030 | latest supported version. | |
984 | 1031 | kwargs |
985 | 1032 | Additional keyword arguments passed to :func:`pyarrow.parquet.write_table`. |
986 | 1033 | |
995 | 1042 | GeoDataFrame.to_file : write GeoDataFrame to file |
996 | 1043 | """ |
997 | 1044 | |
1045 | # Accept engine keyword for compatibility with pandas.DataFrame.to_parquet | |
1046 | # The only engine currently supported by GeoPandas is pyarrow, so no | |
1047 | # other engine should be specified. | |
1048 | engine = kwargs.pop("engine", "auto") | |
1049 | if engine not in ("auto", "pyarrow"): | |
1050 | raise ValueError( | |
1051 | f"GeoPandas only supports using pyarrow as the engine for " | |
1052 | f"to_parquet: {engine!r} passed instead." | |
1053 | ) | |
1054 | ||
998 | 1055 | from geopandas.io.arrow import _to_parquet |
999 | 1056 | |
1000 | _to_parquet(self, path, compression=compression, index=index, **kwargs) | |
1001 | ||
1002 | def to_feather(self, path, index=None, compression=None, **kwargs): | |
1057 | _to_parquet( | |
1058 | self, path, compression=compression, index=index, version=version, **kwargs | |
1059 | ) | |
1060 | ||
1061 | def to_feather(self, path, index=None, compression=None, version=None, **kwargs): | |
1003 | 1062 | """Write a GeoDataFrame to the Feather format. |
1004 | 1063 | |
1005 | 1064 | Any geometry columns present are serialized to WKB format in the file. |
1006 | 1065 | |
1007 | 1066 | Requires 'pyarrow' >= 0.17. |
1008 | 1067 | |
1009 | WARNING: this is an initial implementation of Feather file support and | |
1010 | associated metadata. This is tracking version 0.1.0 of the metadata | |
1011 | specification at: | |
1012 | https://github.com/geopandas/geo-arrow-spec | |
1068 | WARNING: this is an early implementation of Parquet file support and | |
1069 | associated metadata, the specification for which continues to evolve. | |
1070 | This is tracking version 0.4.0 of the GeoParquet specification at: | |
1071 | https://github.com/opengeospatial/geoparquet | |
1013 | 1072 | |
1014 | 1073 | This metadata specification does not yet make stability promises. As such, |
1015 | 1074 | we do not yet recommend using this in a production setting unless you are |
1029 | 1088 | compression : {'zstd', 'lz4', 'uncompressed'}, optional |
1030 | 1089 | Name of the compression to use. Use ``"uncompressed"`` for no |
1031 | 1090 | compression. By default uses LZ4 if available, otherwise uncompressed. |
1091 | version : {'0.1.0', '0.4.0', None} | |
1092 | GeoParquet specification version; if not provided will default to | |
1093 | latest supported version. | |
1032 | 1094 | kwargs |
1033 | 1095 | Additional keyword arguments passed to to |
1034 | 1096 | :func:`pyarrow.feather.write_feather`. |
1046 | 1108 | |
1047 | 1109 | from geopandas.io.arrow import _to_feather |
1048 | 1110 | |
1049 | _to_feather(self, path, index=index, compression=compression, **kwargs) | |
1111 | _to_feather( | |
1112 | self, path, index=index, compression=compression, version=version, **kwargs | |
1113 | ) | |
1050 | 1114 | |
1051 | 1115 | def to_file(self, filename, driver=None, schema=None, index=None, **kwargs): |
1052 | 1116 | """Write the ``GeoDataFrame`` to a file. |
1061 | 1125 | Parameters |
1062 | 1126 | ---------- |
1063 | 1127 | filename : string |
1064 | File path or file handle to write to. | |
1128 | File path or file handle to write to. The path may specify a | |
1129 | GDAL VSI scheme. | |
1065 | 1130 | driver : string, default None |
1066 | 1131 | The OGR format driver used to write the vector file. |
1067 | 1132 | If not specified, it attempts to infer it from the file extension. |
1068 | 1133 | If no extension is specified, it saves ESRI Shapefile to a folder. |
1069 | schema : dict, default: None | |
1134 | schema : dict, default None | |
1070 | 1135 | If specified, the schema dictionary is passed to Fiona to |
1071 | better control how the file is written. | |
1136 | better control how the file is written. If None, GeoPandas | |
1137 | will determine the schema based on each column's dtype. | |
1138 | Not supported for the "pyogrio" engine. | |
1072 | 1139 | index : bool, default None |
1073 | 1140 | If True, write index into one or more columns (for MultiIndex). |
1074 | 1141 | Default None writes the index into one or more columns only if |
1077 | 1144 | |
1078 | 1145 | .. versionadded:: 0.7 |
1079 | 1146 | Previously the index was not written. |
1147 | mode : string, default 'w' | |
1148 | The write mode, 'w' to overwrite the existing file and 'a' to append. | |
1149 | Not all drivers support appending. The drivers that support appending | |
1150 | are listed in fiona.supported_drivers or | |
1151 | https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py | |
1152 | crs : pyproj.CRS, default None | |
1153 | If specified, the CRS is passed to Fiona to | |
1154 | better control how the file is written. If None, GeoPandas | |
1155 | will determine the crs based on crs df attribute. | |
1156 | The value can be anything accepted | |
1157 | by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, | |
1158 | such as an authority string (eg "EPSG:4326") or a WKT string. | |
1159 | engine : str, "fiona" or "pyogrio" | |
1160 | The underlying library that is used to write the file. Currently, the | |
1161 | supported options are "fiona" and "pyogrio". Defaults to "fiona" if | |
1162 | installed, otherwise tries "pyogrio". | |
1163 | **kwargs : | |
1164 | Keyword args to be passed to the engine, and can be used to write | |
1165 | to multi-layer data, store data within archives (zip files), etc. | |
1166 | In case of the "fiona" engine, the keyword arguments are passed to | |
1167 | fiona.open`. For more information on possible keywords, type: | |
1168 | ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine, | |
1169 | the keyword arguments are passed to `pyogrio.write_dataframe`. | |
1080 | 1170 | |
1081 | 1171 | Notes |
1082 | 1172 | ----- |
1083 | The extra keyword arguments ``**kwargs`` are passed to fiona.open and | |
1084 | can be used to write to multi-layer data, store data within archives | |
1085 | (zip files), etc. | |
1086 | ||
1087 | 1173 | The format drivers will attempt to detect the encoding of your data, but |
1088 | 1174 | may fail. In this case, the proper encoding can be specified explicitly |
1089 | 1175 | by using the encoding keyword parameter, e.g. ``encoding='utf-8'``. |
1273 | 1359 | df = self.copy() |
1274 | 1360 | geom = df.geometry.to_crs(crs=crs, epsg=epsg) |
1275 | 1361 | df.geometry = geom |
1276 | df.crs = geom.crs | |
1277 | 1362 | if not inplace: |
1278 | 1363 | return df |
1279 | 1364 | |
1320 | 1405 | def __getitem__(self, key): |
1321 | 1406 | """ |
1322 | 1407 | If the result is a column containing only 'geometry', return a |
1323 | GeoSeries. If it's a DataFrame with a 'geometry' column, return a | |
1324 | GeoDataFrame. | |
1408 | GeoSeries. If it's a DataFrame with any columns of GeometryDtype, | |
1409 | return a GeoDataFrame. | |
1325 | 1410 | """ |
1326 | 1411 | result = super().__getitem__(key) |
1327 | 1412 | geo_col = self._geometry_column_name |
1328 | 1413 | if isinstance(result, Series) and isinstance(result.dtype, GeometryDtype): |
1329 | 1414 | result.__class__ = GeoSeries |
1330 | elif isinstance(result, DataFrame) and geo_col in result: | |
1331 | result.__class__ = GeoDataFrame | |
1332 | result._geometry_column_name = geo_col | |
1333 | elif isinstance(result, DataFrame) and geo_col not in result: | |
1334 | result.__class__ = DataFrame | |
1415 | elif isinstance(result, DataFrame): | |
1416 | if (result.dtypes == "geometry").sum() > 0: | |
1417 | result.__class__ = GeoDataFrame | |
1418 | if geo_col in result: | |
1419 | result._geometry_column_name = geo_col | |
1420 | else: | |
1421 | result._geometry_column_name = None | |
1422 | else: | |
1423 | result.__class__ = DataFrame | |
1335 | 1424 | return result |
1336 | 1425 | |
1337 | 1426 | def __setitem__(self, key, value): |
1343 | 1432 | if pd.api.types.is_scalar(value) or isinstance(value, BaseGeometry): |
1344 | 1433 | value = [value] * self.shape[0] |
1345 | 1434 | try: |
1346 | value = _ensure_geometry(value, crs=self.crs) | |
1347 | self._crs = value.crs | |
1435 | # TODO: remove this use of _crs in 0.12 | |
1436 | warn = False | |
1437 | if not (hasattr(self, "geometry") and hasattr(self.geometry, "crs")): | |
1438 | crs = self._crs | |
1439 | warn = True | |
1440 | else: | |
1441 | crs = getattr(self, "crs", None) | |
1442 | value = _ensure_geometry(value, crs=crs) | |
1443 | if warn and crs is not None: | |
1444 | warnings.warn( | |
1445 | "Setting geometries to a GeoDataFrame without a geometry " | |
1446 | "column will currently preserve the CRS, if present. " | |
1447 | "This is deprecated, and in the future the CRS will be lost " | |
1448 | "in this case. You can use set_crs(..) on the result to " | |
1449 | "set the CRS manually.", | |
1450 | FutureWarning, | |
1451 | stacklevel=2, | |
1452 | ) | |
1348 | 1453 | except TypeError: |
1349 | 1454 | warnings.warn("Geometry column does not contain geometry.") |
1350 | 1455 | super().__setitem__(key, value) |
1389 | 1494 | result = super().apply( |
1390 | 1495 | func, axis=axis, raw=raw, result_type=result_type, args=args, **kwargs |
1391 | 1496 | ) |
1497 | # Reconstruct gdf if it was lost by apply | |
1392 | 1498 | if ( |
1393 | isinstance(result, GeoDataFrame) | |
1499 | isinstance(result, DataFrame) | |
1394 | 1500 | and self._geometry_column_name in result.columns |
1395 | and isinstance(result[self._geometry_column_name].dtype, GeometryDtype) | |
1396 | 1501 | ): |
1397 | # apply calls _constructor which resets geom col name to geometry | |
1398 | result._geometry_column_name = self._geometry_column_name | |
1399 | if self.crs is not None and result.crs is None: | |
1400 | result.set_crs(self.crs, inplace=True) | |
1502 | # axis=1 apply will split GeometryDType to object, try and cast back | |
1503 | try: | |
1504 | result = result.set_geometry(self._geometry_column_name) | |
1505 | except TypeError: | |
1506 | pass | |
1507 | else: | |
1508 | if self.crs is not None and result.crs is None: | |
1509 | result.set_crs(self.crs, inplace=True) | |
1510 | elif isinstance(result, Series): | |
1511 | # Reconstruct series GeometryDtype if lost by apply | |
1512 | try: | |
1513 | # Note CRS cannot be preserved in this case as func could refer | |
1514 | # to any column | |
1515 | result = _ensure_geometry(result) | |
1516 | except TypeError: | |
1517 | pass | |
1518 | ||
1401 | 1519 | return result |
1402 | 1520 | |
1403 | 1521 | @property |
1404 | 1522 | def _constructor(self): |
1405 | return GeoDataFrame | |
1523 | return _geodataframe_constructor_with_fallback | |
1524 | ||
1525 | @property | |
1526 | def _constructor_sliced(self): | |
1527 | def _geodataframe_constructor_sliced(*args, **kwargs): | |
1528 | """ | |
1529 | A specialized (Geo)Series constructor which can fall back to a | |
1530 | Series if a certain operation does not produce geometries: | |
1531 | ||
1532 | - We only return a GeoSeries if the data is actually of geometry | |
1533 | dtype (and so we don't try to convert geometry objects such as | |
1534 | the normal GeoSeries(..) constructor does with `_ensure_geometry`). | |
1535 | - When we get here from obtaining a row or column from a | |
1536 | GeoDataFrame, the goal is to only return a GeoSeries for a | |
1537 | geometry column, and not return a GeoSeries for a row that happened | |
1538 | to come from a DataFrame with only geometry dtype columns (and | |
1539 | thus could have a geometry dtype). Therefore, we don't return a | |
1540 | GeoSeries if we are sure we are in a row selection case (by | |
1541 | checking the identity of the index) | |
1542 | """ | |
1543 | srs = pd.Series(*args, **kwargs) | |
1544 | is_row_proxy = srs.index is self.columns | |
1545 | if is_geometry_type(srs) and not is_row_proxy: | |
1546 | srs = GeoSeries(srs) | |
1547 | return srs | |
1548 | ||
1549 | return _geodataframe_constructor_sliced | |
1406 | 1550 | |
1407 | 1551 | def __finalize__(self, other, method=None, **kwargs): |
1408 | 1552 | """propagate metadata from other to self""" |
1424 | 1568 | f"Please ensure this column from the first DataFrame is not " |
1425 | 1569 | f"repeated." |
1426 | 1570 | ) |
1571 | elif method == "unstack": | |
1572 | # unstack adds multiindex columns and reshapes data. | |
1573 | # it never makes sense to retain geometry column | |
1574 | self._geometry_column_name = None | |
1575 | self._crs = None | |
1427 | 1576 | return self |
1428 | 1577 | |
1429 | 1578 | def dissolve( |
1452 | 1601 | aggfunc : function or string, default "first" |
1453 | 1602 | Aggregation function for manipulation of data associated |
1454 | 1603 | with each group. Passed to pandas `groupby.agg` method. |
1604 | Accepted combinations are: | |
1605 | ||
1606 | - function | |
1607 | - string function name | |
1608 | - list of functions and/or function names, e.g. [np.sum, 'mean'] | |
1609 | - dict of axis labels -> functions, function names or list of such. | |
1455 | 1610 | as_index : boolean, default True |
1456 | 1611 | If true, groupby columns become index of result. |
1457 | 1612 | level : int or str or sequence of int or sequence of str, default None |
1509 | 1664 | |
1510 | 1665 | See also |
1511 | 1666 | -------- |
1512 | GeoDataFrame.explode : explode muti-part geometries into single geometries | |
1667 | GeoDataFrame.explode : explode multi-part geometries into single geometries | |
1513 | 1668 | |
1514 | 1669 | """ |
1515 | 1670 | |
1528 | 1683 | # Process non-spatial component |
1529 | 1684 | data = self.drop(labels=self.geometry.name, axis=1) |
1530 | 1685 | aggregated_data = data.groupby(**groupby_kwargs).agg(aggfunc) |
1686 | aggregated_data.columns = aggregated_data.columns.to_flat_index() | |
1531 | 1687 | |
1532 | 1688 | # Process spatial component |
1533 | 1689 | def merge_geometries(block): |
1552 | 1708 | # overrides the pandas native explode method to break up features geometrically |
1553 | 1709 | def explode(self, column=None, ignore_index=False, index_parts=None, **kwargs): |
1554 | 1710 | """ |
1555 | Explode muti-part geometries into multiple single geometries. | |
1711 | Explode multi-part geometries into multiple single geometries. | |
1556 | 1712 | |
1557 | 1713 | Each row containing a multi-part geometry will be split into |
1558 | 1714 | multiple rows with single geometries, thereby increasing the vertical |
1650 | 1806 | ) |
1651 | 1807 | index_parts = True |
1652 | 1808 | |
1653 | df_copy = self.copy() | |
1654 | ||
1655 | level_str = f"level_{df_copy.index.nlevels}" | |
1656 | ||
1657 | if level_str in df_copy.columns: # GH1393 | |
1658 | df_copy = df_copy.rename(columns={level_str: f"__{level_str}"}) | |
1659 | ||
1660 | if index_parts: | |
1661 | exploded_geom = df_copy.geometry.explode(index_parts=True) | |
1662 | exploded_index = exploded_geom.index | |
1663 | exploded_geom = exploded_geom.reset_index(level=-1, drop=True) | |
1664 | else: | |
1665 | exploded_geom = df_copy.geometry.explode(index_parts=True).reset_index( | |
1666 | level=-1, drop=True | |
1667 | ) | |
1668 | exploded_index = exploded_geom.index | |
1669 | ||
1670 | df = ( | |
1671 | df_copy.drop(df_copy._geometry_column_name, axis=1) | |
1672 | .join(exploded_geom) | |
1673 | .__finalize__(self) | |
1674 | ) | |
1809 | exploded_geom = self.geometry.reset_index(drop=True).explode(index_parts=True) | |
1810 | ||
1811 | df = GeoDataFrame( | |
1812 | self.drop(self._geometry_column_name, axis=1).take( | |
1813 | exploded_geom.index.droplevel(-1) | |
1814 | ), | |
1815 | geometry=exploded_geom.values, | |
1816 | ).__finalize__(self) | |
1675 | 1817 | |
1676 | 1818 | if ignore_index: |
1677 | 1819 | df.reset_index(inplace=True, drop=True) |
1678 | 1820 | elif index_parts: |
1679 | 1821 | # reset to MultiIndex, otherwise df index is only first level of |
1680 | 1822 | # exploded GeoSeries index. |
1681 | df.set_index(exploded_index, inplace=True) | |
1682 | df.index.names = list(self.index.names) + [None] | |
1683 | else: | |
1684 | df.set_index(exploded_index, inplace=True) | |
1685 | df.index.names = self.index.names | |
1686 | ||
1687 | if f"__{level_str}" in df.columns: | |
1688 | df = df.rename(columns={f"__{level_str}": level_str}) | |
1689 | ||
1690 | geo_df = df.set_geometry(self._geometry_column_name) | |
1691 | return geo_df | |
1823 | df = df.set_index( | |
1824 | exploded_geom.index.droplevel( | |
1825 | list(range(exploded_geom.index.nlevels - 1)) | |
1826 | ), | |
1827 | append=True, | |
1828 | ) | |
1829 | ||
1830 | return df | |
1692 | 1831 | |
1693 | 1832 | # overrides the pandas astype method to ensure the correct return type |
1694 | 1833 | def astype(self, dtype, copy=True, errors="raise", **kwargs): |
1732 | 1871 | """ |
1733 | 1872 | # Overridden to fix GH1870, that return type is not preserved always |
1734 | 1873 | # (and where it was, geometry col was not) |
1735 | ||
1736 | if not compat.PANDAS_GE_10: | |
1737 | raise NotImplementedError( | |
1738 | "GeoDataFrame.convert_dtypes requires pandas >= 1.0" | |
1739 | ) | |
1740 | 1874 | |
1741 | 1875 | return GeoDataFrame( |
1742 | 1876 | super().convert_dtypes(*args, **kwargs), |
1775 | 1909 | - append: Insert new values to the existing table. |
1776 | 1910 | schema : string, optional |
1777 | 1911 | Specify the schema. If None, use default schema: 'public'. |
1778 | index : bool, default True | |
1912 | index : bool, default False | |
1779 | 1913 | Write DataFrame index as a column. |
1780 | 1914 | Uses *index_label* as the column name in the table. |
1781 | 1915 | index_label : string or sequence, default None |
1817 | 1951 | warnings.warn( |
1818 | 1952 | "'^' operator will be deprecated. Use the 'symmetric_difference' " |
1819 | 1953 | "method instead.", |
1820 | DeprecationWarning, | |
1954 | FutureWarning, | |
1821 | 1955 | stacklevel=2, |
1822 | 1956 | ) |
1823 | 1957 | return self.geometry.symmetric_difference(other) |
1826 | 1960 | """Implement | operator as for builtin set type""" |
1827 | 1961 | warnings.warn( |
1828 | 1962 | "'|' operator will be deprecated. Use the 'union' method instead.", |
1829 | DeprecationWarning, | |
1963 | FutureWarning, | |
1830 | 1964 | stacklevel=2, |
1831 | 1965 | ) |
1832 | 1966 | return self.geometry.union(other) |
1835 | 1969 | """Implement & operator as for builtin set type""" |
1836 | 1970 | warnings.warn( |
1837 | 1971 | "'&' operator will be deprecated. Use the 'intersection' method instead.", |
1838 | DeprecationWarning, | |
1972 | FutureWarning, | |
1839 | 1973 | stacklevel=2, |
1840 | 1974 | ) |
1841 | 1975 | return self.geometry.intersection(other) |
1844 | 1978 | """Implement - operator as for builtin set type""" |
1845 | 1979 | warnings.warn( |
1846 | 1980 | "'-' operator will be deprecated. Use the 'difference' method instead.", |
1847 | DeprecationWarning, | |
1981 | FutureWarning, | |
1848 | 1982 | stacklevel=2, |
1849 | 1983 | ) |
1850 | 1984 | return self.geometry.difference(other) |
2040 | 2174 | |
2041 | 2175 | Notes |
2042 | 2176 | ----- |
2043 | Since this join relies on distances, results will be innaccurate | |
2177 | Since this join relies on distances, results will be inaccurate | |
2044 | 2178 | if your geometries are in a geographic CRS. |
2045 | 2179 | |
2046 | 2180 | Every operation in GeoPandas is planar, i.e. the potential third |
2060 | 2194 | """Clip points, lines, or polygon geometries to the mask extent. |
2061 | 2195 | |
2062 | 2196 | Both layers must be in the same Coordinate Reference System (CRS). |
2063 | The GeoDataFrame will be clipped to the full extent of the `mask` object. | |
2197 | The GeoDataFrame will be clipped to the full extent of the ``mask`` object. | |
2064 | 2198 | |
2065 | 2199 | If there are multiple polygons in mask, data from the GeoDataFrame will be |
2066 | 2200 | clipped to the total boundary of all polygons in mask. |
2067 | 2201 | |
2068 | 2202 | Parameters |
2069 | 2203 | ---------- |
2070 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon | |
2071 | Polygon vector layer used to clip `gdf`. | |
2204 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like | |
2205 | Polygon vector layer used to clip the GeoDataFrame. | |
2072 | 2206 | The mask's geometry is dissolved into one geometric feature |
2073 | and intersected with `gdf`. | |
2207 | and intersected with GeoDataFrame. | |
2208 | If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``, | |
2209 | ``clip`` will use a faster rectangle clipping | |
2210 | (:meth:`~GeoSeries.clip_by_rect`), possibly leading to slightly different | |
2211 | results. | |
2074 | 2212 | keep_geom_type : boolean, default False |
2075 | 2213 | If True, return only geometries of original type in case of intersection |
2076 | 2214 | resulting in multiple geometry types or GeometryCollections. |
2079 | 2217 | Returns |
2080 | 2218 | ------- |
2081 | 2219 | GeoDataFrame |
2082 | Vector data (points, lines, polygons) from `gdf` clipped to | |
2220 | Vector data (points, lines, polygons) from the GeoDataFrame clipped to | |
2083 | 2221 | polygon boundary from mask. |
2084 | 2222 | |
2085 | 2223 | See also |
2208 | 2346 | |
2209 | 2347 | |
2210 | 2348 | DataFrame.set_geometry = _dataframe_set_geometry |
2349 | ||
2350 | if not compat.PANDAS_GE_11: # i.e. on pandas 1.0.x | |
2351 | _geodataframe_constructor_with_fallback._from_axes = GeoDataFrame._from_axes |
2 | 2 | |
3 | 3 | import numpy as np |
4 | 4 | import pandas as pd |
5 | from pandas import Series, MultiIndex | |
5 | from pandas import Series, MultiIndex, DataFrame | |
6 | 6 | from pandas.core.internals import SingleBlockManager |
7 | 7 | |
8 | 8 | from pyproj import CRS |
27 | 27 | from .base import is_geometry_type |
28 | 28 | |
29 | 29 | |
30 | _SERIES_WARNING_MSG = """\ | |
31 | You are passing non-geometry data to the GeoSeries constructor. Currently, | |
32 | it falls back to returning a pandas Series. But in the future, we will start | |
33 | to raise a TypeError instead.""" | |
34 | ||
35 | ||
36 | 30 | def _geoseries_constructor_with_fallback(data=None, index=None, crs=None, **kwargs): |
37 | 31 | """ |
38 | 32 | A flexible constructor for GeoSeries._constructor, which needs to be able |
40 | 34 | geometries) |
41 | 35 | """ |
42 | 36 | try: |
43 | with warnings.catch_warnings(): | |
44 | warnings.filterwarnings( | |
45 | "ignore", | |
46 | message=_SERIES_WARNING_MSG, | |
47 | category=FutureWarning, | |
48 | module="geopandas[.*]", | |
49 | ) | |
50 | return GeoSeries(data=data, index=index, crs=crs, **kwargs) | |
37 | return GeoSeries(data=data, index=index, crs=crs, **kwargs) | |
51 | 38 | except TypeError: |
52 | 39 | return Series(data=data, index=index, **kwargs) |
40 | ||
41 | ||
42 | def _geoseries_expanddim(data=None, *args, **kwargs): | |
43 | from geopandas import GeoDataFrame | |
44 | ||
45 | # pd.Series._constructor_expanddim == pd.DataFrame | |
46 | df = pd.DataFrame(data, *args, **kwargs) | |
47 | geo_col_name = None | |
48 | if isinstance(data, GeoSeries): | |
49 | # pandas default column name is 0, keep convention | |
50 | geo_col_name = data.name if data.name is not None else 0 | |
51 | ||
52 | if df.shape[1] == 1: | |
53 | geo_col_name = df.columns[0] | |
54 | ||
55 | if (df.dtypes == "geometry").sum() > 0: | |
56 | if geo_col_name is None or not is_geometry_type(df[geo_col_name]): | |
57 | df = GeoDataFrame(df) | |
58 | df._geometry_column_name = None | |
59 | else: | |
60 | df = df.set_geometry(geo_col_name) | |
61 | ||
62 | return df | |
63 | ||
64 | ||
65 | # pd.concat (pandas/core/reshape/concat.py) requires this for the | |
66 | # concatenation of series since pandas 1.1 | |
67 | # (https://github.com/pandas-dev/pandas/commit/f9e4c8c84bcef987973f2624cc2932394c171c8c) | |
68 | _geoseries_expanddim._get_axis_number = DataFrame._get_axis_number | |
53 | 69 | |
54 | 70 | |
55 | 71 | class GeoSeries(GeoPandasBase, Series): |
132 | 148 | |
133 | 149 | _metadata = ["name"] |
134 | 150 | |
135 | def __new__(cls, data=None, index=None, crs=None, **kwargs): | |
136 | # we need to use __new__ because we want to return Series instance | |
137 | # instead of GeoSeries instance in case of non-geometry data | |
138 | ||
151 | def __init__(self, data=None, index=None, crs=None, **kwargs): | |
139 | 152 | if hasattr(data, "crs") and crs: |
140 | 153 | if not data.crs: |
141 | 154 | # make a copy to avoid setting CRS to passed GeometryArray |
142 | 155 | data = data.copy() |
143 | 156 | else: |
144 | 157 | if not data.crs == crs: |
145 | warnings.warn( | |
158 | raise ValueError( | |
146 | 159 | "CRS mismatch between CRS of the passed geometries " |
147 | "and 'crs'. Use 'GeoDataFrame.set_crs(crs, " | |
160 | "and 'crs'. Use 'GeoSeries.set_crs(crs, " | |
148 | 161 | "allow_override=True)' to overwrite CRS or " |
149 | 162 | "'GeoSeries.to_crs(crs)' to reproject geometries. " |
150 | "CRS mismatch will raise an error in the future versions " | |
151 | "of GeoPandas.", | |
152 | FutureWarning, | |
153 | stacklevel=2, | |
154 | 163 | ) |
155 | # TODO: raise error in 0.9 or 0.10. | |
156 | 164 | |
157 | 165 | if isinstance(data, SingleBlockManager): |
158 | 166 | if isinstance(data.blocks[0].dtype, GeometryDtype): |
167 | 175 | values = data.blocks[0].values |
168 | 176 | block = ExtensionBlock(values, slice(0, len(values), 1), ndim=1) |
169 | 177 | data = SingleBlockManager([block], data.axes[0], fastpath=True) |
170 | self = super(GeoSeries, cls).__new__(cls) | |
171 | super(GeoSeries, self).__init__(data, index=index, **kwargs) | |
172 | self.crs = getattr(self.values, "crs", crs) | |
173 | return self | |
174 | warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) | |
175 | return Series(data, index=index, **kwargs) | |
178 | else: | |
179 | raise TypeError( | |
180 | "Non geometry data passed to GeoSeries constructor, " | |
181 | f"received data of dtype '{data.blocks[0].dtype}'" | |
182 | ) | |
176 | 183 | |
177 | 184 | if isinstance(data, BaseGeometry): |
178 | 185 | # fix problem for scalar geometries passed, ensure the list of |
202 | 209 | # pd.Series with empty data gives float64 for older pandas versions |
203 | 210 | s = s.astype(object) |
204 | 211 | else: |
205 | warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) | |
206 | return s | |
212 | raise TypeError( | |
213 | "Non geometry data passed to GeoSeries constructor, " | |
214 | f"received data of dtype '{s.dtype}'" | |
215 | ) | |
207 | 216 | # try to convert to GeometryArray, if fails return plain Series |
208 | 217 | try: |
209 | 218 | data = from_shapely(s.values, crs) |
210 | 219 | except TypeError: |
211 | warnings.warn(_SERIES_WARNING_MSG, FutureWarning, stacklevel=2) | |
212 | return s | |
220 | raise TypeError( | |
221 | "Non geometry data passed to GeoSeries constructor, " | |
222 | f"received data of dtype '{s.dtype}'" | |
223 | ) | |
213 | 224 | index = s.index |
214 | 225 | name = s.name |
215 | 226 | |
216 | self = super(GeoSeries, cls).__new__(cls) | |
217 | super(GeoSeries, self).__init__(data, index=index, name=name, **kwargs) | |
218 | ||
227 | super().__init__(data, index=index, name=name, **kwargs) | |
219 | 228 | if not self.crs: |
220 | 229 | self.crs = crs |
221 | return self | |
222 | ||
223 | def __init__(self, *args, **kwargs): | |
224 | # need to overwrite Series init to prevent calling it for GeoSeries | |
225 | # (doesn't know crs, all work is already done above) | |
226 | pass | |
227 | 230 | |
228 | 231 | def append(self, *args, **kwargs): |
229 | 232 | return self._wrapped_pandas_method("append", *args, **kwargs) |
541 | 544 | Parameters |
542 | 545 | ---------- |
543 | 546 | filename : string |
544 | File path or file handle to write to. | |
547 | File path or file handle to write to. The path may specify a | |
548 | GDAL VSI scheme. | |
545 | 549 | driver : string, default None |
546 | 550 | The OGR format driver used to write the vector file. |
547 | 551 | If not specified, it attempts to infer it from the file extension. |
554 | 558 | |
555 | 559 | .. versionadded:: 0.7 |
556 | 560 | Previously the index was not written. |
557 | ||
558 | Notes | |
559 | ----- | |
560 | The extra keyword arguments ``**kwargs`` are passed to fiona.open and | |
561 | can be used to write to multi-layer data, store data within archives | |
562 | (zip files), etc. | |
561 | mode : string, default 'w' | |
562 | The write mode, 'w' to overwrite the existing file and 'a' to append. | |
563 | Not all drivers support appending. The drivers that support appending | |
564 | are listed in fiona.supported_drivers or | |
565 | https://github.com/Toblerity/Fiona/blob/master/fiona/drvsupport.py | |
566 | crs : pyproj.CRS, default None | |
567 | If specified, the CRS is passed to Fiona to | |
568 | better control how the file is written. If None, GeoPandas | |
569 | will determine the crs based on crs df attribute. | |
570 | The value can be anything accepted | |
571 | by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, | |
572 | such as an authority string (eg "EPSG:4326") or a WKT string. | |
573 | engine : str, "fiona" or "pyogrio" | |
574 | The underlying library that is used to write the file. Currently, the | |
575 | supported options are "fiona" and "pyogrio". Defaults to "fiona" if | |
576 | installed, otherwise tries "pyogrio". | |
577 | **kwargs : | |
578 | Keyword args to be passed to the engine, and can be used to write | |
579 | to multi-layer data, store data within archives (zip files), etc. | |
580 | In case of the "fiona" engine, the keyword arguments are passed to | |
581 | fiona.open`. For more information on possible keywords, type: | |
582 | ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine, | |
583 | the keyword arguments are passed to `pyogrio.write_dataframe`. | |
563 | 584 | |
564 | 585 | See Also |
565 | 586 | -------- |
591 | 612 | |
592 | 613 | @property |
593 | 614 | def _constructor_expanddim(self): |
594 | from geopandas import GeoDataFrame | |
595 | ||
596 | return GeoDataFrame | |
615 | return _geoseries_expanddim | |
597 | 616 | |
598 | 617 | def _wrapped_pandas_method(self, mtd, *args, **kwargs): |
599 | 618 | """Wrap a generic pandas method to ensure it returns a GeoSeries""" |
625 | 644 | if self.crs is not None: |
626 | 645 | result.set_crs(self.crs, inplace=True) |
627 | 646 | return result |
628 | ||
629 | def __finalize__(self, other, method=None, **kwargs): | |
630 | """propagate metadata from other to self""" | |
631 | # NOTE: backported from pandas master (upcoming v0.13) | |
632 | for name in self._metadata: | |
633 | object.__setattr__(self, name, getattr(other, name, None)) | |
634 | return self | |
635 | 647 | |
636 | 648 | def isna(self): |
637 | 649 | """ |
671 | 683 | GeoSeries.notna : inverse of isna |
672 | 684 | GeoSeries.is_empty : detect empty geometries |
673 | 685 | """ |
674 | if self.is_empty.any(): | |
675 | warnings.warn( | |
676 | "GeoSeries.isna() previously returned True for both missing (None) " | |
677 | "and empty geometries. Now, it only returns True for missing values. " | |
678 | "Since the calling GeoSeries contains empty geometries, the result " | |
679 | "has changed compared to previous versions of GeoPandas.\n" | |
680 | "Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get " | |
681 | "back the old behaviour.\n\n" | |
682 | "To further ignore this warning, you can do: \n" | |
683 | "import warnings; warnings.filterwarnings('ignore', 'GeoSeries.isna', " | |
684 | "UserWarning)", | |
685 | UserWarning, | |
686 | stacklevel=2, | |
687 | ) | |
688 | ||
689 | 686 | return super().isna() |
690 | 687 | |
691 | 688 | def isnull(self): |
919 | 916 | |
920 | 917 | index = [] |
921 | 918 | geometries = [] |
922 | for idx, s in self.geometry.iteritems(): | |
919 | for idx, s in self.geometry.items(): | |
923 | 920 | if s.type.startswith("Multi") or s.type == "GeometryCollection": |
924 | 921 | geoms = s.geoms |
925 | 922 | idxs = [(idx, i) for i in range(len(geoms))] |
1265 | 1262 | warnings.warn( |
1266 | 1263 | "'^' operator will be deprecated. Use the 'symmetric_difference' " |
1267 | 1264 | "method instead.", |
1268 | DeprecationWarning, | |
1265 | FutureWarning, | |
1269 | 1266 | stacklevel=2, |
1270 | 1267 | ) |
1271 | 1268 | return self.symmetric_difference(other) |
1274 | 1271 | """Implement | operator as for builtin set type""" |
1275 | 1272 | warnings.warn( |
1276 | 1273 | "'|' operator will be deprecated. Use the 'union' method instead.", |
1277 | DeprecationWarning, | |
1274 | FutureWarning, | |
1278 | 1275 | stacklevel=2, |
1279 | 1276 | ) |
1280 | 1277 | return self.union(other) |
1283 | 1280 | """Implement & operator as for builtin set type""" |
1284 | 1281 | warnings.warn( |
1285 | 1282 | "'&' operator will be deprecated. Use the 'intersection' method instead.", |
1286 | DeprecationWarning, | |
1283 | FutureWarning, | |
1287 | 1284 | stacklevel=2, |
1288 | 1285 | ) |
1289 | 1286 | return self.intersection(other) |
1292 | 1289 | """Implement - operator as for builtin set type""" |
1293 | 1290 | warnings.warn( |
1294 | 1291 | "'-' operator will be deprecated. Use the 'difference' method instead.", |
1295 | DeprecationWarning, | |
1292 | FutureWarning, | |
1296 | 1293 | stacklevel=2, |
1297 | 1294 | ) |
1298 | 1295 | return self.difference(other) |
1308 | 1305 | |
1309 | 1306 | Parameters |
1310 | 1307 | ---------- |
1311 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon | |
1308 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like | |
1312 | 1309 | Polygon vector layer used to clip `gdf`. |
1313 | 1310 | The mask's geometry is dissolved into one geometric feature |
1314 | and intersected with `gdf`. | |
1311 | and intersected with GeoSeries. | |
1312 | If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``, | |
1313 | ``clip`` will use a faster rectangle clipping | |
1314 | (:meth:`~GeoSeries.clip_by_rect`), possibly leading to slightly different | |
1315 | results. | |
1315 | 1316 | keep_geom_type : boolean, default False |
1316 | 1317 | If True, return only geometries of original type in case of intersection |
1317 | 1318 | resulting in multiple geometry types or GeometryCollections. |
0 | from distutils.version import LooseVersion | |
0 | from packaging.version import Version | |
1 | 1 | import json |
2 | 2 | import warnings |
3 | 3 | |
4 | from pandas import DataFrame | |
4 | from pandas import DataFrame, Series | |
5 | 5 | |
6 | 6 | from geopandas._compat import import_optional_dependency |
7 | 7 | from geopandas.array import from_wkb |
9 | 9 | import geopandas |
10 | 10 | from .file import _expand_user |
11 | 11 | |
12 | METADATA_VERSION = "0.1.0" | |
13 | # reference: https://github.com/geopandas/geo-arrow-spec | |
12 | METADATA_VERSION = "0.4.0" | |
13 | SUPPORTED_VERSIONS = ["0.1.0", "0.4.0"] | |
14 | # reference: https://github.com/opengeospatial/geoparquet | |
14 | 15 | |
15 | 16 | # Metadata structure: |
16 | 17 | # { |
17 | 18 | # "geo": { |
18 | 19 | # "columns": { |
19 | 20 | # "<name>": { |
20 | # "crs": "<WKT or None: REQUIRED>", | |
21 | 21 | # "encoding": "WKB" |
22 | # "geometry_type": <str or list of str: REQUIRED> | |
23 | # "crs": "<PROJJSON or None: OPTIONAL>", | |
24 | # "orientation": "<'counterclockwise' or None: OPTIONAL>" | |
25 | # "edges": "planar" | |
26 | # "bbox": <list of [xmin, ymin, xmax, ymax]: OPTIONAL> | |
27 | # "epoch": <float: OPTIONAL> | |
22 | 28 | # } |
23 | 29 | # }, |
30 | # "primary_column": "<str: REQUIRED>", | |
31 | # "version": "<METADATA_VERSION>", | |
32 | # | |
33 | # # Additional GeoPandas specific metadata (not in metadata spec) | |
24 | 34 | # "creator": { |
25 | 35 | # "library": "geopandas", |
26 | 36 | # "version": "<geopandas.__version__>" |
27 | 37 | # } |
28 | # "primary_column": "<str: REQUIRED>", | |
29 | # "schema_version": "<METADATA_VERSION>" | |
30 | 38 | # } |
31 | 39 | # } |
32 | 40 | |
39 | 47 | ) |
40 | 48 | |
41 | 49 | |
42 | def _create_metadata(df): | |
50 | def _remove_id_from_member_of_ensembles(json_dict): | |
51 | """ | |
52 | Older PROJ versions will not recognize IDs of datum ensemble members that | |
53 | were added in more recent PROJ database versions. | |
54 | ||
55 | Cf https://github.com/opengeospatial/geoparquet/discussions/110 | |
56 | and https://github.com/OSGeo/PROJ/pull/3221 | |
57 | ||
58 | Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872 | |
59 | """ | |
60 | for key, value in json_dict.items(): | |
61 | if isinstance(value, dict): | |
62 | _remove_id_from_member_of_ensembles(value) | |
63 | elif key == "members" and isinstance(value, list): | |
64 | for member in value: | |
65 | member.pop("id", None) | |
66 | ||
67 | ||
68 | def _create_metadata(df, version=None): | |
43 | 69 | """Create and encode geo metadata dict. |
44 | 70 | |
45 | 71 | Parameters |
46 | 72 | ---------- |
47 | 73 | df : GeoDataFrame |
74 | version : {'0.1.0', '0.4.0', None} | |
75 | GeoParquet specification version; if not provided will default to | |
76 | latest supported version. | |
48 | 77 | |
49 | 78 | Returns |
50 | 79 | ------- |
51 | 80 | dict |
52 | 81 | """ |
82 | ||
83 | version = version or METADATA_VERSION | |
84 | ||
85 | if version not in SUPPORTED_VERSIONS: | |
86 | raise ValueError(f"version must be one of: {', '.join(SUPPORTED_VERSIONS)}") | |
53 | 87 | |
54 | 88 | # Construct metadata for each geometry |
55 | 89 | column_metadata = {} |
56 | 90 | for col in df.columns[df.dtypes == "geometry"]: |
57 | 91 | series = df[col] |
92 | geometry_types = sorted(Series(series.geom_type.unique()).dropna()) | |
93 | ||
94 | crs = None | |
95 | if series.crs: | |
96 | if version == "0.1.0": | |
97 | crs = series.crs.to_wkt() | |
98 | else: # version >= 0.4.0 | |
99 | crs = series.crs.to_json_dict() | |
100 | _remove_id_from_member_of_ensembles(crs) | |
101 | ||
58 | 102 | column_metadata[col] = { |
59 | "crs": series.crs.to_wkt() if series.crs else None, | |
60 | 103 | "encoding": "WKB", |
104 | "crs": crs, | |
105 | "geometry_type": geometry_types[0] | |
106 | if len(geometry_types) == 1 | |
107 | else geometry_types, | |
61 | 108 | "bbox": series.total_bounds.tolist(), |
62 | 109 | } |
63 | 110 | |
64 | 111 | return { |
65 | 112 | "primary_column": df._geometry_column_name, |
66 | 113 | "columns": column_metadata, |
67 | "schema_version": METADATA_VERSION, | |
114 | "version": METADATA_VERSION, | |
68 | 115 | "creator": {"library": "geopandas", "version": geopandas.__version__}, |
69 | 116 | } |
70 | 117 | |
141 | 188 | |
142 | 189 | if not metadata: |
143 | 190 | raise ValueError("Missing or malformed geo metadata in Parquet/Feather file") |
191 | ||
192 | # version was schema_version in 0.1.0 | |
193 | version = metadata.get("version", metadata.get("schema_version")) | |
194 | if not version: | |
195 | raise ValueError( | |
196 | "'geo' metadata in Parquet/Feather file is missing required key: " | |
197 | "'version'" | |
198 | ) | |
144 | 199 | |
145 | 200 | required_keys = ("primary_column", "columns") |
146 | 201 | for key in required_keys: |
154 | 209 | raise ValueError("'columns' in 'geo' metadata must be a dict") |
155 | 210 | |
156 | 211 | # Validate that geometry columns have required metadata and values |
157 | required_col_keys = ("crs", "encoding") | |
212 | # leaving out "geometry_type" for compatibility with 0.1 | |
213 | required_col_keys = ("encoding",) | |
158 | 214 | for col, column_metadata in metadata["columns"].items(): |
159 | 215 | for key in required_col_keys: |
160 | 216 | if key not in column_metadata: |
167 | 223 | raise ValueError("Only WKB geometry encoding is supported") |
168 | 224 | |
169 | 225 | |
170 | def _geopandas_to_arrow(df, index=None): | |
226 | def _geopandas_to_arrow(df, index=None, version=None): | |
171 | 227 | """ |
172 | 228 | Helper function with main, shared logic for to_parquet/to_feather. |
173 | 229 | """ |
174 | 230 | from pyarrow import Table |
175 | 231 | |
176 | warnings.warn( | |
177 | "this is an initial implementation of Parquet/Feather file support and " | |
178 | "associated metadata. This is tracking version 0.1.0 of the metadata " | |
179 | "specification at " | |
180 | "https://github.com/geopandas/geo-arrow-spec\n\n" | |
181 | "This metadata specification does not yet make stability promises. " | |
182 | "We do not yet recommend using this in a production setting unless you " | |
183 | "are able to rewrite your Parquet/Feather files.\n\n" | |
184 | "To further ignore this warning, you can do: \n" | |
185 | "import warnings; warnings.filterwarnings('ignore', " | |
186 | "message='.*initial implementation of Parquet.*')", | |
187 | UserWarning, | |
188 | stacklevel=4, | |
189 | ) | |
190 | ||
191 | 232 | _validate_dataframe(df) |
192 | 233 | |
193 | 234 | # create geo metadata before altering incoming data frame |
194 | geo_metadata = _create_metadata(df) | |
235 | geo_metadata = _create_metadata(df, version=version) | |
195 | 236 | |
196 | 237 | df = df.to_wkb() |
197 | 238 | |
204 | 245 | return table.replace_schema_metadata(metadata) |
205 | 246 | |
206 | 247 | |
207 | def _to_parquet(df, path, index=None, compression="snappy", **kwargs): | |
248 | def _to_parquet(df, path, index=None, compression="snappy", version=None, **kwargs): | |
208 | 249 | """ |
209 | 250 | Write a GeoDataFrame to the Parquet format. |
210 | 251 | |
212 | 253 | |
213 | 254 | Requires 'pyarrow'. |
214 | 255 | |
215 | WARNING: this is an initial implementation of Parquet file support and | |
216 | associated metadata. This is tracking version 0.1.0 of the metadata | |
217 | specification at: | |
218 | https://github.com/geopandas/geo-arrow-spec | |
219 | ||
220 | This metadata specification does not yet make stability promises. As such, | |
221 | we do not yet recommend using this in a production setting unless you are | |
222 | able to rewrite your Parquet files. | |
223 | ||
256 | This is tracking version 0.4.0 of the GeoParquet specification at: | |
257 | https://github.com/opengeospatial/geoparquet | |
224 | 258 | |
225 | 259 | .. versionadded:: 0.8 |
226 | 260 | |
235 | 269 | output except `RangeIndex` which is stored as metadata only. |
236 | 270 | compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' |
237 | 271 | Name of the compression to use. Use ``None`` for no compression. |
272 | version : {'0.1.0', '0.4.0', None} | |
273 | GeoParquet specification version; if not provided will default to | |
274 | latest supported version. | |
238 | 275 | kwargs |
239 | 276 | Additional keyword arguments passed to pyarrow.parquet.write_table(). |
240 | 277 | """ |
243 | 280 | ) |
244 | 281 | |
245 | 282 | path = _expand_user(path) |
246 | table = _geopandas_to_arrow(df, index=index) | |
283 | table = _geopandas_to_arrow(df, index=index, version=version) | |
247 | 284 | parquet.write_table(table, path, compression=compression, **kwargs) |
248 | 285 | |
249 | 286 | |
250 | def _to_feather(df, path, index=None, compression=None, **kwargs): | |
287 | def _to_feather(df, path, index=None, compression=None, version=None, **kwargs): | |
251 | 288 | """ |
252 | 289 | Write a GeoDataFrame to the Feather format. |
253 | 290 | |
255 | 292 | |
256 | 293 | Requires 'pyarrow' >= 0.17. |
257 | 294 | |
258 | WARNING: this is an initial implementation of Feather file support and | |
259 | associated metadata. This is tracking version 0.1.0 of the metadata | |
260 | specification at: | |
261 | https://github.com/geopandas/geo-arrow-spec | |
262 | ||
263 | This metadata specification does not yet make stability promises. As such, | |
264 | we do not yet recommend using this in a production setting unless you are | |
265 | able to rewrite your Feather files. | |
295 | This is tracking version 0.4.0 of the GeoParquet specification at: | |
296 | https://github.com/opengeospatial/geoparquet | |
266 | 297 | |
267 | 298 | .. versionadded:: 0.8 |
268 | 299 | |
278 | 309 | compression : {'zstd', 'lz4', 'uncompressed'}, optional |
279 | 310 | Name of the compression to use. Use ``"uncompressed"`` for no |
280 | 311 | compression. By default uses LZ4 if available, otherwise uncompressed. |
312 | version : {'0.1.0', '0.4.0', None} | |
313 | GeoParquet specification version; if not provided will default to | |
314 | latest supported version. | |
281 | 315 | kwargs |
282 | 316 | Additional keyword arguments passed to pyarrow.feather.write_feather(). |
283 | 317 | """ |
287 | 321 | # TODO move this into `import_optional_dependency` |
288 | 322 | import pyarrow |
289 | 323 | |
290 | if pyarrow.__version__ < LooseVersion("0.17.0"): | |
324 | if Version(pyarrow.__version__) < Version("0.17.0"): | |
291 | 325 | raise ImportError("pyarrow >= 0.17 required for Feather support") |
292 | 326 | |
293 | 327 | path = _expand_user(path) |
294 | table = _geopandas_to_arrow(df, index=index) | |
328 | table = _geopandas_to_arrow(df, index=index, version=version) | |
295 | 329 | feather.write_feather(table, path, compression=compression, **kwargs) |
296 | 330 | |
297 | 331 | |
298 | def _arrow_to_geopandas(table): | |
332 | def _arrow_to_geopandas(table, metadata=None): | |
299 | 333 | """ |
300 | 334 | Helper function with main, shared logic for read_parquet/read_feather. |
301 | 335 | """ |
302 | 336 | df = table.to_pandas() |
303 | 337 | |
304 | metadata = table.schema.metadata | |
338 | metadata = metadata or table.schema.metadata | |
305 | 339 | if metadata is None or b"geo" not in metadata: |
306 | 340 | raise ValueError( |
307 | 341 | """Missing geo metadata in Parquet/Feather file. |
343 | 377 | |
344 | 378 | # Convert the WKB columns that are present back to geometry. |
345 | 379 | for col in geometry_columns: |
346 | df[col] = from_wkb(df[col].values, crs=metadata["columns"][col]["crs"]) | |
380 | col_metadata = metadata["columns"][col] | |
381 | if "crs" in col_metadata: | |
382 | crs = col_metadata["crs"] | |
383 | if isinstance(crs, dict): | |
384 | _remove_id_from_member_of_ensembles(crs) | |
385 | else: | |
386 | # per the GeoParquet spec, missing CRS is to be interpreted as | |
387 | # OGC:CRS84 | |
388 | crs = "OGC:CRS84" | |
389 | ||
390 | df[col] = from_wkb(df[col].values, crs=crs) | |
347 | 391 | |
348 | 392 | return GeoDataFrame(df, geometry=geometry) |
349 | 393 | |
360 | 404 | isinstance(path, str) |
361 | 405 | and storage_options is None |
362 | 406 | and filesystem is None |
363 | and LooseVersion(pyarrow.__version__) >= "5.0.0" | |
407 | and Version(pyarrow.__version__) >= Version("5.0.0") | |
364 | 408 | ): |
365 | 409 | # Use the native pyarrow filesystem if possible. |
366 | 410 | try: |
386 | 430 | return filesystem, path |
387 | 431 | |
388 | 432 | |
433 | def _ensure_arrow_fs(filesystem): | |
434 | """ | |
435 | Simplified version of pyarrow.fs._ensure_filesystem. This is only needed | |
436 | below because `pyarrow.parquet.read_metadata` does not yet accept a | |
437 | filesystem keyword (https://issues.apache.org/jira/browse/ARROW-16719) | |
438 | """ | |
439 | from pyarrow import fs | |
440 | ||
441 | if isinstance(filesystem, fs.FileSystem): | |
442 | return filesystem | |
443 | ||
444 | # handle fsspec-compatible filesystems | |
445 | try: | |
446 | import fsspec | |
447 | except ImportError: | |
448 | pass | |
449 | else: | |
450 | if isinstance(filesystem, fsspec.AbstractFileSystem): | |
451 | return fs.PyFileSystem(fs.FSSpecHandler(filesystem)) | |
452 | ||
453 | return filesystem | |
454 | ||
455 | ||
389 | 456 | def _read_parquet(path, columns=None, storage_options=None, **kwargs): |
390 | 457 | """ |
391 | 458 | Load a Parquet object from the file path, returning a GeoDataFrame. |
399 | 466 | * if the primary geometry column saved to this file is not included in |
400 | 467 | columns, the first available geometry column will be set as the geometry |
401 | 468 | column of the returned GeoDataFrame. |
469 | ||
470 | Supports versions 0.1.0, 0.4.0 of the GeoParquet specification at: | |
471 | https://github.com/opengeospatial/geoparquet | |
472 | ||
473 | If 'crs' key is not present in the GeoParquet metadata associated with the | |
474 | Parquet object, it will default to "OGC:CRS84" according to the specification. | |
402 | 475 | |
403 | 476 | Requires 'pyarrow'. |
404 | 477 | |
457 | 530 | kwargs["use_pandas_metadata"] = True |
458 | 531 | table = parquet.read_table(path, columns=columns, filesystem=filesystem, **kwargs) |
459 | 532 | |
460 | return _arrow_to_geopandas(table) | |
533 | # read metadata separately to get the raw Parquet FileMetaData metadata | |
534 | # (pyarrow doesn't properly exposes those in schema.metadata for files | |
535 | # created by GDAL - https://issues.apache.org/jira/browse/ARROW-16688) | |
536 | metadata = None | |
537 | if table.schema.metadata is None or b"geo" not in table.schema.metadata: | |
538 | try: | |
539 | # read_metadata does not accept a filesystem keyword, so need to | |
540 | # handle this manually (https://issues.apache.org/jira/browse/ARROW-16719) | |
541 | if filesystem is not None: | |
542 | pa_filesystem = _ensure_arrow_fs(filesystem) | |
543 | with pa_filesystem.open_input_file(path) as source: | |
544 | metadata = parquet.read_metadata(source).metadata | |
545 | else: | |
546 | metadata = parquet.read_metadata(path).metadata | |
547 | except Exception: | |
548 | pass | |
549 | ||
550 | return _arrow_to_geopandas(table, metadata) | |
461 | 551 | |
462 | 552 | |
463 | 553 | def _read_feather(path, columns=None, **kwargs): |
473 | 563 | * if the primary geometry column saved to this file is not included in |
474 | 564 | columns, the first available geometry column will be set as the geometry |
475 | 565 | column of the returned GeoDataFrame. |
566 | ||
567 | Supports versions 0.1.0, 0.4.0 of the GeoParquet specification at: | |
568 | https://github.com/opengeospatial/geoparquet | |
569 | ||
570 | If 'crs' key is not present in the Feather metadata associated with the | |
571 | Parquet object, it will default to "OGC:CRS84" according to the specification. | |
476 | 572 | |
477 | 573 | Requires 'pyarrow' >= 0.17. |
478 | 574 | |
512 | 608 | # TODO move this into `import_optional_dependency` |
513 | 609 | import pyarrow |
514 | 610 | |
515 | if pyarrow.__version__ < LooseVersion("0.17.0"): | |
611 | if Version(pyarrow.__version__) < Version("0.17.0"): | |
516 | 612 | raise ImportError("pyarrow >= 0.17 required for Feather support") |
517 | 613 | |
518 | 614 | path = _expand_user(path) |
0 | 0 | import os |
1 | from distutils.version import LooseVersion | |
1 | from packaging.version import Version | |
2 | 2 | from pathlib import Path |
3 | 3 | import warnings |
4 | 4 | |
5 | 5 | import numpy as np |
6 | 6 | import pandas as pd |
7 | from pandas.api.types import is_integer_dtype | |
7 | 8 | |
8 | 9 | import pyproj |
9 | 10 | from shapely.geometry import mapping |
10 | 11 | from shapely.geometry.base import BaseGeometry |
11 | 12 | |
12 | try: | |
13 | import fiona | |
14 | ||
15 | fiona_import_error = None | |
16 | ||
17 | # only try to import fiona.Env if the main fiona import succeeded (otherwise you | |
18 | # can get confusing "AttributeError: module 'fiona' has no attribute '_loading'" | |
19 | # / partially initialized module errors) | |
20 | try: | |
21 | from fiona import Env as fiona_env | |
22 | except ImportError: | |
23 | try: | |
24 | from fiona import drivers as fiona_env | |
25 | except ImportError: | |
26 | fiona_env = None | |
27 | ||
28 | except ImportError as err: | |
29 | fiona = None | |
30 | fiona_import_error = str(err) | |
31 | ||
32 | ||
33 | 13 | from geopandas import GeoDataFrame, GeoSeries |
34 | ||
35 | 14 | |
36 | 15 | # Adapted from pandas.io.common |
37 | 16 | from urllib.request import urlopen as _urlopen |
41 | 20 | |
42 | 21 | _VALID_URLS = set(uses_relative + uses_netloc + uses_params) |
43 | 22 | _VALID_URLS.discard("") |
23 | ||
24 | ||
25 | fiona = None | |
26 | fiona_env = None | |
27 | fiona_import_error = None | |
28 | ||
29 | ||
30 | def _import_fiona(): | |
31 | global fiona | |
32 | global fiona_env | |
33 | global fiona_import_error | |
34 | ||
35 | if fiona is None: | |
36 | try: | |
37 | import fiona | |
38 | ||
39 | # only try to import fiona.Env if the main fiona import succeeded | |
40 | # (otherwise you can get confusing "AttributeError: module 'fiona' | |
41 | # has no attribute '_loading'" / partially initialized module errors) | |
42 | try: | |
43 | from fiona import Env as fiona_env | |
44 | except ImportError: | |
45 | try: | |
46 | from fiona import drivers as fiona_env | |
47 | except ImportError: | |
48 | fiona_env = None | |
49 | ||
50 | except ImportError as err: | |
51 | fiona = False | |
52 | fiona_import_error = str(err) | |
53 | ||
54 | ||
55 | pyogrio = None | |
56 | pyogrio_import_error = None | |
57 | ||
58 | ||
59 | def _import_pyogrio(): | |
60 | global pyogrio | |
61 | global pyogrio_import_error | |
62 | ||
63 | if pyogrio is None: | |
64 | try: | |
65 | import pyogrio | |
66 | except ImportError as err: | |
67 | pyogrio = False | |
68 | pyogrio_import_error = str(err) | |
69 | ||
70 | ||
71 | def _check_fiona(func): | |
72 | if fiona is None: | |
73 | raise ImportError( | |
74 | f"the {func} requires the 'fiona' package, but it is not installed or does " | |
75 | f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}" | |
76 | ) | |
77 | ||
78 | ||
79 | def _check_pyogrio(func): | |
80 | if pyogrio is None: | |
81 | raise ImportError( | |
82 | f"the {func} requires the 'pyogrio' package, but it is not installed " | |
83 | "or does not import correctly." | |
84 | "\nImporting pyogrio resulted in: {pyogrio_import_error}" | |
85 | ) | |
86 | ||
87 | ||
88 | def _check_engine(engine, func): | |
89 | # default to "fiona" if installed, otherwise try pyogrio | |
90 | if engine is None: | |
91 | _import_fiona() | |
92 | if fiona: | |
93 | engine = "fiona" | |
94 | else: | |
95 | _import_pyogrio() | |
96 | if pyogrio: | |
97 | engine = "pyogrio" | |
98 | ||
99 | if engine == "fiona": | |
100 | _import_fiona() | |
101 | _check_fiona(func) | |
102 | elif engine == "pyogrio": | |
103 | _import_pyogrio() | |
104 | _check_pyogrio(func) | |
105 | elif engine is None: | |
106 | raise ImportError( | |
107 | f"The {func} requires the 'pyogrio' or 'fiona' package, " | |
108 | "but neither is installed or imports correctly." | |
109 | f"\nImporting fiona resulted in: {fiona_import_error}" | |
110 | f"\nImporting pyogrio resulted in: {pyogrio_import_error}" | |
111 | ) | |
112 | ||
113 | return engine | |
114 | ||
44 | 115 | |
45 | 116 | _EXTENSION_TO_DRIVER = { |
46 | 117 | ".bna": "BNA", |
74 | 145 | return path |
75 | 146 | |
76 | 147 | |
77 | def _check_fiona(func): | |
78 | if fiona is None: | |
79 | raise ImportError( | |
80 | f"the {func} requires the 'fiona' package, but it is not installed or does " | |
81 | f"not import correctly.\nImporting fiona resulted in: {fiona_import_error}" | |
82 | ) | |
83 | ||
84 | ||
85 | 148 | def _is_url(url): |
86 | 149 | """Check to see if *url* has a valid protocol.""" |
87 | 150 | try: |
100 | 163 | ) |
101 | 164 | |
102 | 165 | |
103 | def _read_file(filename, bbox=None, mask=None, rows=None, **kwargs): | |
166 | def _read_file(filename, bbox=None, mask=None, rows=None, engine=None, **kwargs): | |
104 | 167 | """ |
105 | 168 | Returns a GeoDataFrame from a file or URL. |
106 | 169 | |
113 | 176 | be opened, or any object with a read() method (such as an open file |
114 | 177 | or StringIO) |
115 | 178 | bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None |
116 | Filter features by given bounding box, GeoSeries, GeoDataFrame or a | |
117 | shapely geometry. CRS mis-matches are resolved if given a GeoSeries | |
118 | or GeoDataFrame. Tuple is (minx, miny, maxx, maxy) to match the | |
119 | bounds property of shapely geometry objects. Cannot be used with mask. | |
179 | Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely | |
180 | geometry. With engine="fiona", CRS mis-matches are resolved if given a GeoSeries | |
181 | or GeoDataFrame. With engine="pyogrio", bbox must be in the same CRS as the | |
182 | dataset. Tuple is (minx, miny, maxx, maxy) to match the bounds property of | |
183 | shapely geometry objects. Cannot be used with mask. | |
120 | 184 | mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None |
121 | 185 | Filter for features that intersect with the given dict-like geojson |
122 | 186 | geometry, GeoSeries, GeoDataFrame or shapely geometry. |
125 | 189 | rows : int or slice, default None |
126 | 190 | Load in specific rows by passing an integer (first `n` rows) or a |
127 | 191 | slice() object. |
192 | engine : str, "fiona" or "pyogrio" | |
193 | The underlying library that is used to read the file. Currently, the | |
194 | supported options are "fiona" and "pyogrio". Defaults to "fiona" if | |
195 | installed, otherwise tries "pyogrio". | |
128 | 196 | **kwargs : |
129 | Keyword args to be passed to the `open` or `BytesCollection` method | |
130 | in the fiona library when opening the file. For more information on | |
131 | possible keywords, type: | |
132 | ``import fiona; help(fiona.open)`` | |
197 | Keyword args to be passed to the engine. In case of the "fiona" engine, | |
198 | the keyword arguments are passed to the `open` or `BytesCollection` | |
199 | method in the fiona library when opening the file. For more information | |
200 | on possible keywords, type: ``import fiona; help(fiona.open)``. In | |
201 | case of the "pyogrio" engine, the keyword arguments are passed to | |
202 | `pyogrio.read_dataframe`. | |
133 | 203 | |
134 | 204 | Examples |
135 | 205 | -------- |
162 | 232 | may fail. In this case, the proper encoding can be specified explicitly |
163 | 233 | by using the encoding keyword parameter, e.g. ``encoding='utf-8'``. |
164 | 234 | """ |
165 | _check_fiona("'read_file' function") | |
235 | engine = _check_engine(engine, "'read_file' function") | |
236 | ||
166 | 237 | filename = _expand_user(filename) |
167 | 238 | |
239 | from_bytes = False | |
168 | 240 | if _is_url(filename): |
169 | 241 | req = _urlopen(filename) |
170 | 242 | path_or_bytes = req.read() |
171 | reader = fiona.BytesCollection | |
243 | from_bytes = True | |
172 | 244 | elif pd.api.types.is_file_like(filename): |
173 | 245 | data = filename.read() |
174 | 246 | path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data |
175 | reader = fiona.BytesCollection | |
247 | from_bytes = True | |
176 | 248 | else: |
249 | path_or_bytes = filename | |
250 | ||
251 | if engine == "fiona": | |
252 | return _read_file_fiona( | |
253 | path_or_bytes, from_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs | |
254 | ) | |
255 | elif engine == "pyogrio": | |
256 | return _read_file_pyogrio( | |
257 | path_or_bytes, bbox=bbox, mask=mask, rows=rows, **kwargs | |
258 | ) | |
259 | else: | |
260 | raise ValueError(f"unknown engine '{engine}'") | |
261 | ||
262 | ||
263 | def _read_file_fiona( | |
264 | path_or_bytes, from_bytes, bbox=None, mask=None, rows=None, **kwargs | |
265 | ): | |
266 | if not from_bytes: | |
177 | 267 | # Opening a file via URL or file-like-object above automatically detects a |
178 | 268 | # zipped file. In order to match that behavior, attempt to add a zip scheme |
179 | 269 | # if missing. |
180 | if _is_zip(str(filename)): | |
181 | parsed = fiona.parse_path(str(filename)) | |
270 | if _is_zip(str(path_or_bytes)): | |
271 | parsed = fiona.parse_path(str(path_or_bytes)) | |
182 | 272 | if isinstance(parsed, fiona.path.ParsedPath): |
183 | 273 | # If fiona is able to parse the path, we can safely look at the scheme |
184 | 274 | # and update it to have a zip scheme if necessary. |
185 | 275 | schemes = (parsed.scheme or "").split("+") |
186 | 276 | if "zip" not in schemes: |
187 | 277 | parsed.scheme = "+".join(["zip"] + schemes) |
188 | filename = parsed.name | |
278 | path_or_bytes = parsed.name | |
189 | 279 | elif isinstance(parsed, fiona.path.UnparsedPath) and not str( |
190 | filename | |
280 | path_or_bytes | |
191 | 281 | ).startswith("/vsi"): |
192 | 282 | # If fiona is unable to parse the path, it might have a Windows drive |
193 | 283 | # scheme. Try adding zip:// to the front. If the path starts with "/vsi" |
194 | 284 | # it is a legacy GDAL path type, so let it pass unmodified. |
195 | filename = "zip://" + parsed.name | |
196 | path_or_bytes = filename | |
285 | path_or_bytes = "zip://" + parsed.name | |
286 | ||
287 | if from_bytes: | |
288 | reader = fiona.BytesCollection | |
289 | else: | |
197 | 290 | reader = fiona.open |
198 | 291 | |
199 | 292 | with fiona_env(): |
235 | 328 | f_filt = features |
236 | 329 | # get list of columns |
237 | 330 | columns = list(features.schema["properties"]) |
331 | datetime_fields = [ | |
332 | k for (k, v) in features.schema["properties"].items() if v == "datetime" | |
333 | ] | |
238 | 334 | if kwargs.get("ignore_geometry", False): |
239 | return pd.DataFrame( | |
335 | df = pd.DataFrame( | |
240 | 336 | [record["properties"] for record in f_filt], columns=columns |
241 | 337 | ) |
242 | ||
243 | return GeoDataFrame.from_features( | |
244 | f_filt, crs=crs, columns=columns + ["geometry"] | |
245 | ) | |
338 | else: | |
339 | df = GeoDataFrame.from_features( | |
340 | f_filt, crs=crs, columns=columns + ["geometry"] | |
341 | ) | |
342 | for k in datetime_fields: | |
343 | # fiona only supports up to ms precision, any microseconds are | |
344 | # floating point rounding error | |
345 | df[k] = pd.to_datetime(df[k]).dt.round(freq="ms") | |
346 | return df | |
347 | ||
348 | ||
349 | def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs): | |
350 | import pyogrio | |
351 | ||
352 | if rows is not None: | |
353 | if isinstance(rows, int): | |
354 | kwargs["max_features"] = rows | |
355 | elif isinstance(rows, slice): | |
356 | if rows.start is not None: | |
357 | kwargs["skip_features"] = rows.start | |
358 | if rows.stop is not None: | |
359 | kwargs["max_features"] = rows.stop - (rows.start or 0) | |
360 | if rows.step is not None: | |
361 | raise ValueError("slice with step is not supported") | |
362 | else: | |
363 | raise TypeError("'rows' must be an integer or a slice.") | |
364 | if bbox is not None: | |
365 | if isinstance(bbox, (GeoDataFrame, GeoSeries)): | |
366 | bbox = tuple(bbox.total_bounds) | |
367 | elif isinstance(bbox, BaseGeometry): | |
368 | bbox = bbox.bounds | |
369 | if len(bbox) != 4: | |
370 | raise ValueError("'bbox' should be a length-4 tuple.") | |
371 | if mask is not None: | |
372 | raise ValueError( | |
373 | "The 'mask' keyword is not supported with the 'pyogrio' engine. " | |
374 | "You can use 'bbox' instead." | |
375 | ) | |
376 | if kwargs.pop("ignore_geometry", False): | |
377 | kwargs["read_geometry"] = False | |
378 | ||
379 | # TODO: if bbox is not None, check its CRS vs the CRS of the file | |
380 | return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs) | |
246 | 381 | |
247 | 382 | |
248 | 383 | def read_file(*args, **kwargs): |
249 | import warnings | |
250 | ||
251 | 384 | warnings.warn( |
252 | 385 | "geopandas.io.file.read_file() is intended for internal " |
253 | 386 | "use only, and will be deprecated. Use geopandas.read_file() instead.", |
254 | DeprecationWarning, | |
387 | FutureWarning, | |
255 | 388 | stacklevel=2, |
256 | 389 | ) |
257 | 390 | |
259 | 392 | |
260 | 393 | |
261 | 394 | def to_file(*args, **kwargs): |
262 | import warnings | |
263 | ||
264 | 395 | warnings.warn( |
265 | 396 | "geopandas.io.file.to_file() is intended for internal " |
266 | 397 | "use only, and will be deprecated. Use GeoDataFrame.to_file() " |
267 | 398 | "or GeoSeries.to_file() instead.", |
268 | DeprecationWarning, | |
399 | FutureWarning, | |
269 | 400 | stacklevel=2, |
270 | 401 | ) |
271 | 402 | |
298 | 429 | index=None, |
299 | 430 | mode="w", |
300 | 431 | crs=None, |
432 | engine=None, | |
301 | 433 | **kwargs, |
302 | 434 | ): |
303 | 435 | """ |
311 | 443 | ---------- |
312 | 444 | df : GeoDataFrame to be written |
313 | 445 | filename : string |
314 | File path or file handle to write to. | |
446 | File path or file handle to write to. The path may specify a | |
447 | GDAL VSI scheme. | |
315 | 448 | driver : string, default None |
316 | 449 | The OGR format driver used to write the vector file. |
317 | 450 | If not specified, it attempts to infer it from the file extension. |
319 | 452 | schema : dict, default None |
320 | 453 | If specified, the schema dictionary is passed to Fiona to |
321 | 454 | better control how the file is written. If None, GeoPandas |
322 | will determine the schema based on each column's dtype | |
455 | will determine the schema based on each column's dtype. | |
456 | Not supported for the "pyogrio" engine. | |
323 | 457 | index : bool, default None |
324 | 458 | If True, write index into one or more columns (for MultiIndex). |
325 | 459 | Default None writes the index into one or more columns only if |
340 | 474 | The value can be anything accepted |
341 | 475 | by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`, |
342 | 476 | such as an authority string (eg "EPSG:4326") or a WKT string. |
343 | ||
344 | The *kwargs* are passed to fiona.open and can be used to write | |
345 | to multi-layer data, store data within archives (zip files), etc. | |
346 | The path may specify a fiona VSI scheme. | |
477 | engine : str, "fiona" or "pyogrio" | |
478 | The underlying library that is used to write the file. Currently, the | |
479 | supported options are "fiona" and "pyogrio". Defaults to "fiona" if | |
480 | installed, otherwise tries "pyogrio". | |
481 | **kwargs : | |
482 | Keyword args to be passed to the engine, and can be used to write | |
483 | to multi-layer data, store data within archives (zip files), etc. | |
484 | In case of the "fiona" engine, the keyword arguments are passed to | |
485 | fiona.open`. For more information on possible keywords, type: | |
486 | ``import fiona; help(fiona.open)``. In case of the "pyogrio" engine, | |
487 | the keyword arguments are passed to `pyogrio.write_dataframe`. | |
347 | 488 | |
348 | 489 | Notes |
349 | 490 | ----- |
351 | 492 | may fail. In this case, the proper encoding can be specified explicitly |
352 | 493 | by using the encoding keyword parameter, e.g. ``encoding='utf-8'``. |
353 | 494 | """ |
354 | _check_fiona("'to_file' method") | |
495 | engine = _check_engine(engine, "'to_file' method") | |
496 | ||
355 | 497 | filename = _expand_user(filename) |
356 | 498 | |
357 | 499 | if index is None: |
358 | 500 | # Determine if index attribute(s) should be saved to file |
359 | index = list(df.index.names) != [None] or type(df.index) not in ( | |
360 | pd.RangeIndex, | |
361 | pd.Int64Index, | |
362 | ) | |
501 | # (only if they are named or are non-integer) | |
502 | index = list(df.index.names) != [None] or not is_integer_dtype(df.index.dtype) | |
363 | 503 | if index: |
364 | 504 | df = df.reset_index(drop=False) |
365 | if schema is None: | |
366 | schema = infer_schema(df) | |
367 | if crs: | |
368 | crs = pyproj.CRS.from_user_input(crs) | |
369 | else: | |
370 | crs = df.crs | |
371 | 505 | |
372 | 506 | if driver is None: |
373 | 507 | driver = _detect_driver(filename) |
378 | 512 | "ESRI Shapefile.", |
379 | 513 | stacklevel=3, |
380 | 514 | ) |
515 | ||
516 | if engine == "fiona": | |
517 | _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs) | |
518 | elif engine == "pyogrio": | |
519 | _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs) | |
520 | else: | |
521 | raise ValueError(f"unknown engine '{engine}'") | |
522 | ||
523 | ||
524 | def _to_file_fiona(df, filename, driver, schema, crs, mode, **kwargs): | |
525 | ||
526 | if schema is None: | |
527 | schema = infer_schema(df) | |
528 | ||
529 | if crs: | |
530 | crs = pyproj.CRS.from_user_input(crs) | |
531 | else: | |
532 | crs = df.crs | |
381 | 533 | |
382 | 534 | with fiona_env(): |
383 | 535 | crs_wkt = None |
385 | 537 | gdal_version = fiona.env.get_gdal_release_name() |
386 | 538 | except AttributeError: |
387 | 539 | gdal_version = "2.0.0" # just assume it is not the latest |
388 | if LooseVersion(gdal_version) >= LooseVersion("3.0.0") and crs: | |
540 | if Version(gdal_version) >= Version("3.0.0") and crs: | |
389 | 541 | crs_wkt = crs.to_wkt() |
390 | 542 | elif crs: |
391 | 543 | crs_wkt = crs.to_wkt("WKT1_GDAL") |
393 | 545 | filename, mode=mode, driver=driver, crs_wkt=crs_wkt, schema=schema, **kwargs |
394 | 546 | ) as colxn: |
395 | 547 | colxn.writerecords(df.iterfeatures()) |
548 | ||
549 | ||
550 | def _to_file_pyogrio(df, filename, driver, schema, crs, mode, **kwargs): | |
551 | import pyogrio | |
552 | ||
553 | if schema is not None: | |
554 | raise ValueError( | |
555 | "The 'schema' argument is not supported with the 'pyogrio' engine." | |
556 | ) | |
557 | ||
558 | if mode != "w": | |
559 | raise ValueError( | |
560 | "Only mode='w' is supported for now with the 'pyogrio' engine." | |
561 | ) | |
562 | ||
563 | if crs is not None: | |
564 | raise ValueError("Passing 'crs' it not supported with the 'pyogrio' engine.") | |
565 | ||
566 | # for the fiona engine, this check is done in gdf.iterfeatures() | |
567 | if not df.columns.is_unique: | |
568 | raise ValueError("GeoDataFrame cannot contain duplicated column names.") | |
569 | ||
570 | pyogrio.write_dataframe(df, filename, driver=driver, **kwargs) | |
396 | 571 | |
397 | 572 | |
398 | 573 | def infer_schema(df): |
424 | 599 | ) |
425 | 600 | |
426 | 601 | if df.empty: |
427 | raise ValueError("Cannot write empty DataFrame to file.") | |
602 | warnings.warn( | |
603 | "You are attempting to write an empty DataFrame to file. " | |
604 | "For some drivers, this operation may fail.", | |
605 | UserWarning, | |
606 | stacklevel=3, | |
607 | ) | |
428 | 608 | |
429 | 609 | # Since https://github.com/Toblerity/Fiona/issues/446 resolution, |
430 | 610 | # Fiona allows a list of geometry types |
181 | 181 | warnings.warn( |
182 | 182 | "geopandas.io.sql.read_postgis() is intended for internal " |
183 | 183 | "use only, and will be deprecated. Use geopandas.read_postgis() instead.", |
184 | DeprecationWarning, | |
184 | FutureWarning, | |
185 | 185 | stacklevel=2, |
186 | 186 | ) |
187 | 187 | |
229 | 229 | |
230 | 230 | # Check for 3D-coordinates |
231 | 231 | if any(gdf.geometry.has_z): |
232 | target_geom_type = target_geom_type + "Z" | |
232 | target_geom_type += "Z" | |
233 | 233 | |
234 | 234 | return target_geom_type, has_curve |
235 | 235 | |
241 | 241 | |
242 | 242 | # Use geoalchemy2 default for srid |
243 | 243 | # Note: undefined srid in PostGIS is 0 |
244 | srid = -1 | |
244 | srid = None | |
245 | 245 | warning_msg = ( |
246 | 246 | "Could not parse CRS from the GeoDataFrame. " |
247 | + "Inserting data without defined CRS.", | |
247 | "Inserting data without defined CRS." | |
248 | 248 | ) |
249 | 249 | if gdf.crs is not None: |
250 | 250 | try: |
251 | srid = gdf.crs.to_epsg(min_confidence=25) | |
252 | if srid is None: | |
253 | srid = -1 | |
254 | warnings.warn(warning_msg, UserWarning, stacklevel=2) | |
251 | for confidence in (100, 70, 25): | |
252 | srid = gdf.crs.to_epsg(min_confidence=confidence) | |
253 | if srid is not None: | |
254 | break | |
255 | auth_srid = gdf.crs.to_authority( | |
256 | auth_name="ESRI", min_confidence=confidence | |
257 | ) | |
258 | if auth_srid is not None: | |
259 | srid = int(auth_srid[1]) | |
260 | break | |
255 | 261 | except Exception: |
256 | 262 | warnings.warn(warning_msg, UserWarning, stacklevel=2) |
263 | ||
264 | if srid is None: | |
265 | srid = -1 | |
266 | warnings.warn(warning_msg, UserWarning, stacklevel=2) | |
267 | ||
257 | 268 | return srid |
258 | 269 | |
259 | 270 | |
271 | 282 | |
272 | 283 | |
273 | 284 | def _convert_to_ewkb(gdf, geom_name, srid): |
274 | """Convert geometries to ewkb. """ | |
285 | """Convert geometries to ewkb.""" | |
275 | 286 | if compat.USE_PYGEOS: |
276 | 287 | from pygeos import set_srid, to_wkb |
277 | 288 | |
368 | 379 | try: |
369 | 380 | from geoalchemy2 import Geometry |
370 | 381 | except ImportError: |
371 | raise ImportError("'to_postgis()' requires geoalchemy2 package. ") | |
372 | ||
373 | if not compat.SHAPELY_GE_17: | |
374 | raise ImportError( | |
375 | "'to_postgis()' requires newer version of Shapely " | |
376 | "(>= '1.7.0').\nYou can update the library using " | |
377 | "'pip install shapely --upgrade' or using " | |
378 | "'conda update shapely' if using conda package manager." | |
379 | ) | |
382 | raise ImportError("'to_postgis()' requires geoalchemy2 package.") | |
380 | 383 | |
381 | 384 | gdf = gdf.copy() |
382 | 385 | geom_name = gdf.geometry.name |
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
Binary diff not shown
30 | 30 | |
31 | 31 | |
32 | 32 | def create_pickle_data(): |
33 | """ create the pickle data """ | |
33 | """create the pickle data""" | |
34 | 34 | |
35 | 35 | # custom geometry column name |
36 | 36 | gdf_the_geom = geopandas.GeoDataFrame( |
0 | 0 | from __future__ import absolute_import |
1 | 1 | |
2 | from distutils.version import LooseVersion | |
2 | from itertools import product | |
3 | import json | |
4 | from packaging.version import Version | |
3 | 5 | import os |
6 | import pathlib | |
4 | 7 | |
5 | 8 | import pytest |
6 | 9 | from pandas import DataFrame, read_parquet as pd_read_parquet |
7 | 10 | from pandas.testing import assert_frame_equal |
8 | 11 | import numpy as np |
9 | from shapely.geometry import box | |
12 | import pyproj | |
13 | from shapely.geometry import box, Point, MultiPolygon | |
14 | ||
10 | 15 | |
11 | 16 | import geopandas |
12 | 17 | from geopandas import GeoDataFrame, read_file, read_parquet, read_feather |
13 | 18 | from geopandas.array import to_wkb |
14 | 19 | from geopandas.datasets import get_path |
15 | 20 | from geopandas.io.arrow import ( |
21 | SUPPORTED_VERSIONS, | |
16 | 22 | _create_metadata, |
17 | 23 | _decode_metadata, |
18 | 24 | _encode_metadata, |
25 | _geopandas_to_arrow, | |
19 | 26 | _get_filesystem_path, |
27 | _remove_id_from_member_of_ensembles, | |
20 | 28 | _validate_dataframe, |
21 | 29 | _validate_metadata, |
22 | 30 | METADATA_VERSION, |
23 | 31 | ) |
24 | 32 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal |
33 | from geopandas.tests.util import mock | |
34 | ||
35 | ||
36 | DATA_PATH = pathlib.Path(os.path.dirname(__file__)) / "data" | |
25 | 37 | |
26 | 38 | |
27 | 39 | # Skip all tests in this module if pyarrow is not available |
28 | 40 | pyarrow = pytest.importorskip("pyarrow") |
29 | ||
30 | # TEMPORARY: hide warning from to_parquet | |
31 | pytestmark = pytest.mark.filterwarnings("ignore:.*initial implementation of Parquet.*") | |
32 | 41 | |
33 | 42 | |
34 | 43 | @pytest.fixture( |
37 | 46 | pytest.param( |
38 | 47 | "feather", |
39 | 48 | marks=pytest.mark.skipif( |
40 | pyarrow.__version__ < LooseVersion("0.17.0"), | |
49 | Version(pyarrow.__version__) < Version("0.17.0"), | |
41 | 50 | reason="needs pyarrow >= 0.17", |
42 | 51 | ), |
43 | 52 | ), |
56 | 65 | metadata = _create_metadata(df) |
57 | 66 | |
58 | 67 | assert isinstance(metadata, dict) |
59 | assert metadata["schema_version"] == METADATA_VERSION | |
68 | assert metadata["version"] == METADATA_VERSION | |
69 | assert metadata["primary_column"] == "geometry" | |
70 | assert "geometry" in metadata["columns"] | |
71 | crs_expected = df.crs.to_json_dict() | |
72 | _remove_id_from_member_of_ensembles(crs_expected) | |
73 | assert metadata["columns"]["geometry"]["crs"] == crs_expected | |
74 | assert metadata["columns"]["geometry"]["encoding"] == "WKB" | |
75 | assert metadata["columns"]["geometry"]["geometry_type"] == [ | |
76 | "MultiPolygon", | |
77 | "Polygon", | |
78 | ] | |
79 | ||
80 | assert np.array_equal( | |
81 | metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds | |
82 | ) | |
83 | ||
60 | 84 | assert metadata["creator"]["library"] == "geopandas" |
61 | 85 | assert metadata["creator"]["version"] == geopandas.__version__ |
62 | assert metadata["primary_column"] == "geometry" | |
63 | assert "geometry" in metadata["columns"] | |
64 | assert metadata["columns"]["geometry"]["crs"] == df.geometry.crs.to_wkt() | |
65 | assert metadata["columns"]["geometry"]["encoding"] == "WKB" | |
66 | ||
67 | assert np.array_equal( | |
68 | metadata["columns"]["geometry"]["bbox"], df.geometry.total_bounds | |
69 | ) | |
86 | ||
87 | ||
88 | def test_crs_metadata_datum_ensemble(): | |
89 | # compatibility for older PROJ versions using PROJJSON with datum ensembles | |
90 | # https://github.com/geopandas/geopandas/pull/2453 | |
91 | crs = pyproj.CRS("EPSG:4326") | |
92 | crs_json = crs.to_json_dict() | |
93 | check_ensemble = False | |
94 | if "datum_ensemble" in crs_json: | |
95 | # older version of PROJ don't yet have datum ensembles | |
96 | check_ensemble = True | |
97 | assert "id" in crs_json["datum_ensemble"]["members"][0] | |
98 | _remove_id_from_member_of_ensembles(crs_json) | |
99 | if check_ensemble: | |
100 | assert "id" not in crs_json["datum_ensemble"]["members"][0] | |
101 | # ensure roundtrip still results in an equivalent CRS | |
102 | assert pyproj.CRS(crs_json) == crs | |
103 | ||
104 | ||
105 | def test_write_metadata_invalid_spec_version(): | |
106 | gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326") | |
107 | with pytest.raises(ValueError, match="version must be one of"): | |
108 | _create_metadata(gdf, version="invalid") | |
70 | 109 | |
71 | 110 | |
72 | 111 | def test_encode_metadata(): |
81 | 120 | |
82 | 121 | expected = {"a": "b"} |
83 | 122 | assert _decode_metadata(metadata_str) == expected |
123 | ||
124 | assert _decode_metadata(None) is None | |
84 | 125 | |
85 | 126 | |
86 | 127 | def test_validate_dataframe(): |
111 | 152 | { |
112 | 153 | "primary_column": "geometry", |
113 | 154 | "columns": {"geometry": {"crs": None, "encoding": "WKB"}}, |
155 | "schema_version": "0.1.0", | |
114 | 156 | } |
115 | 157 | ) |
116 | 158 | |
117 | 159 | _validate_metadata( |
118 | 160 | { |
119 | 161 | "primary_column": "geometry", |
120 | "columns": {"geometry": {"crs": "WKT goes here", "encoding": "WKB"}}, | |
162 | "columns": {"geometry": {"crs": None, "encoding": "WKB"}}, | |
163 | "version": "<version>", | |
164 | } | |
165 | ) | |
166 | ||
167 | _validate_metadata( | |
168 | { | |
169 | "primary_column": "geometry", | |
170 | "columns": { | |
171 | "geometry": { | |
172 | "crs": { | |
173 | # truncated PROJJSON for testing, as PROJJSON contents | |
174 | # not validated here | |
175 | "id": {"authority": "EPSG", "code": 4326}, | |
176 | }, | |
177 | "encoding": "WKB", | |
178 | } | |
179 | }, | |
180 | "version": "0.4.0", | |
121 | 181 | } |
122 | 182 | ) |
123 | 183 | |
125 | 185 | @pytest.mark.parametrize( |
126 | 186 | "metadata,error", |
127 | 187 | [ |
188 | (None, "Missing or malformed geo metadata in Parquet/Feather file"), | |
128 | 189 | ({}, "Missing or malformed geo metadata in Parquet/Feather file"), |
129 | ( | |
130 | {"primary_column": "foo"}, | |
131 | "'geo' metadata in Parquet/Feather file is missing required key:", | |
132 | ), | |
190 | # missing "version" key: | |
133 | 191 | ( |
134 | 192 | {"primary_column": "foo", "columns": None}, |
135 | 193 | "'geo' metadata in Parquet/Feather file is missing required key", |
136 | 194 | ), |
195 | # missing "columns" key: | |
137 | 196 | ( |
138 | {"primary_column": "foo", "columns": []}, | |
197 | {"primary_column": "foo", "version": "<version>"}, | |
198 | "'geo' metadata in Parquet/Feather file is missing required key:", | |
199 | ), | |
200 | # missing "primary_column" | |
201 | ( | |
202 | {"columns": [], "version": "<version>"}, | |
203 | "'geo' metadata in Parquet/Feather file is missing required key:", | |
204 | ), | |
205 | ( | |
206 | {"primary_column": "foo", "columns": [], "version": "<version>"}, | |
139 | 207 | "'columns' in 'geo' metadata must be a dict", |
140 | 208 | ), |
209 | # missing "encoding" for column | |
141 | 210 | ( |
142 | {"primary_column": "foo", "columns": {"foo": {}}}, | |
211 | {"primary_column": "foo", "columns": {"foo": {}}, "version": "<version>"}, | |
143 | 212 | ( |
144 | "'geo' metadata in Parquet/Feather file is missing required key 'crs' " | |
145 | "for column 'foo'" | |
213 | "'geo' metadata in Parquet/Feather file is missing required key " | |
214 | "'encoding' for column 'foo'" | |
146 | 215 | ), |
147 | 216 | ), |
148 | ( | |
149 | {"primary_column": "foo", "columns": {"foo": {"crs": None}}}, | |
150 | "'geo' metadata in Parquet/Feather file is missing required key", | |
151 | ), | |
152 | ( | |
153 | {"primary_column": "foo", "columns": {"foo": {"encoding": None}}}, | |
154 | "'geo' metadata in Parquet/Feather file is missing required key", | |
155 | ), | |
217 | # invalid column encoding | |
156 | 218 | ( |
157 | 219 | { |
158 | 220 | "primary_column": "foo", |
159 | 221 | "columns": {"foo": {"crs": None, "encoding": None}}, |
222 | "version": "<version>", | |
160 | 223 | }, |
161 | 224 | "Only WKB geometry encoding is supported", |
162 | 225 | ), |
164 | 227 | { |
165 | 228 | "primary_column": "foo", |
166 | 229 | "columns": {"foo": {"crs": None, "encoding": "BKW"}}, |
230 | "version": "<version>", | |
167 | 231 | }, |
168 | 232 | "Only WKB geometry encoding is supported", |
169 | 233 | ), |
172 | 236 | def test_validate_metadata_invalid(metadata, error): |
173 | 237 | with pytest.raises(ValueError, match=error): |
174 | 238 | _validate_metadata(metadata) |
239 | ||
240 | ||
241 | def test_to_parquet_fails_on_invalid_engine(tmpdir): | |
242 | df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)]) | |
243 | ||
244 | with pytest.raises( | |
245 | ValueError, | |
246 | match=( | |
247 | "GeoPandas only supports using pyarrow as the engine for " | |
248 | "to_parquet: 'fastparquet' passed instead." | |
249 | ), | |
250 | ): | |
251 | df.to_parquet(tmpdir / "test.parquet", engine="fastparquet") | |
252 | ||
253 | ||
254 | @mock.patch("geopandas.io.arrow._to_parquet") | |
255 | def test_to_parquet_does_not_pass_engine_along(mock_to_parquet): | |
256 | df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)]) | |
257 | df.to_parquet("", engine="pyarrow") | |
258 | # assert that engine keyword is not passed through to _to_parquet (and thus | |
259 | # parquet.write_table) | |
260 | mock_to_parquet.assert_called_with( | |
261 | df, "", compression="snappy", index=None, version=None | |
262 | ) | |
175 | 263 | |
176 | 264 | |
177 | 265 | # TEMPORARY: used to determine if pyarrow fails for roundtripping pandas data |
216 | 304 | |
217 | 305 | filename = os.path.join(str(tmpdir), "test.pq") |
218 | 306 | |
219 | # TEMP: Initial implementation should raise a UserWarning | |
220 | with pytest.warns(UserWarning, match="initial implementation"): | |
221 | writer(df, filename) | |
307 | writer(df, filename) | |
222 | 308 | |
223 | 309 | assert os.path.exists(filename) |
224 | 310 | |
270 | 356 | |
271 | 357 | |
272 | 358 | @pytest.mark.skipif( |
273 | pyarrow.__version__ < LooseVersion("0.17.0"), | |
359 | Version(pyarrow.__version__) < Version("0.17.0"), | |
274 | 360 | reason="Feather only supported for pyarrow >= 0.17", |
275 | 361 | ) |
276 | 362 | @pytest.mark.parametrize("compression", ["uncompressed", "lz4", "zstd"]) |
488 | 574 | |
489 | 575 | |
490 | 576 | @pytest.mark.skipif( |
491 | pyarrow.__version__ >= LooseVersion("0.17.0"), | |
577 | Version(pyarrow.__version__) >= Version("0.17.0"), | |
492 | 578 | reason="Feather only supported for pyarrow >= 0.17", |
493 | 579 | ) |
494 | 580 | def test_feather_arrow_version(tmpdir): |
527 | 613 | result = read_parquet("memory://data.parquet", filesystem=memfs) |
528 | 614 | assert_geodataframe_equal(result, df) |
529 | 615 | |
616 | # reset fsspec registry | |
617 | fsspec.register_implementation( | |
618 | "memory", fsspec.implementations.memory.MemoryFileSystem, clobber=True | |
619 | ) | |
620 | ||
530 | 621 | |
531 | 622 | def test_non_fsspec_url_with_storage_options_raises(): |
532 | 623 | with pytest.raises(ValueError, match="storage_options"): |
535 | 626 | |
536 | 627 | |
537 | 628 | @pytest.mark.skipif( |
538 | pyarrow.__version__ < LooseVersion("5.0.0"), | |
629 | Version(pyarrow.__version__) < Version("5.0.0"), | |
539 | 630 | reason="pyarrow.fs requires pyarrow>=5.0.0", |
540 | 631 | ) |
541 | 632 | def test_prefers_pyarrow_fs(): |
559 | 650 | f_df = geopandas.read_feather(test_file) |
560 | 651 | assert_geodataframe_equal(gdf, f_df, check_crs=True) |
561 | 652 | os.remove(os.path.expanduser(test_file)) |
653 | ||
654 | ||
655 | @pytest.mark.parametrize("format", ["feather", "parquet"]) | |
656 | def test_write_read_default_crs(tmpdir, format): | |
657 | if format == "feather": | |
658 | from pyarrow.feather import write_feather as write | |
659 | else: | |
660 | from pyarrow.parquet import write_table as write | |
661 | ||
662 | filename = os.path.join(str(tmpdir), f"test.{format}") | |
663 | gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)]) | |
664 | table = _geopandas_to_arrow(gdf) | |
665 | ||
666 | # update the geo metadata to strip 'crs' entry | |
667 | metadata = table.schema.metadata | |
668 | geo_metadata = _decode_metadata(metadata[b"geo"]) | |
669 | del geo_metadata["columns"]["geometry"]["crs"] | |
670 | metadata.update({b"geo": _encode_metadata(geo_metadata)}) | |
671 | table = table.replace_schema_metadata(metadata) | |
672 | ||
673 | write(table, filename) | |
674 | ||
675 | read = getattr(geopandas, f"read_{format}") | |
676 | df = read(filename) | |
677 | assert df.crs.equals(pyproj.CRS("OGC:CRS84")) | |
678 | ||
679 | ||
680 | @pytest.mark.parametrize( | |
681 | "format,version", product(["feather", "parquet"], [None] + SUPPORTED_VERSIONS) | |
682 | ) | |
683 | def test_write_spec_version(tmpdir, format, version): | |
684 | if format == "feather": | |
685 | from pyarrow.feather import read_table | |
686 | ||
687 | else: | |
688 | from pyarrow.parquet import read_table | |
689 | ||
690 | filename = os.path.join(str(tmpdir), f"test.{format}") | |
691 | gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="EPSG:4326") | |
692 | write = getattr(gdf, f"to_{format}") | |
693 | write(filename, version=version) | |
694 | ||
695 | # ensure that we can roundtrip data regardless of version | |
696 | read = getattr(geopandas, f"read_{format}") | |
697 | df = read(filename) | |
698 | assert_geodataframe_equal(df, gdf) | |
699 | ||
700 | table = read_table(filename) | |
701 | metadata = json.loads(table.schema.metadata[b"geo"]) | |
702 | assert metadata["version"] == version or METADATA_VERSION | |
703 | ||
704 | # verify that CRS is correctly handled between versions | |
705 | if version == "0.1.0": | |
706 | assert metadata["columns"]["geometry"]["crs"] == gdf.crs.to_wkt() | |
707 | ||
708 | else: | |
709 | crs_expected = gdf.crs.to_json_dict() | |
710 | _remove_id_from_member_of_ensembles(crs_expected) | |
711 | assert metadata["columns"]["geometry"]["crs"] == crs_expected | |
712 | ||
713 | ||
714 | @pytest.mark.parametrize("version", ["0.1.0", "0.4.0"]) | |
715 | def test_read_versioned_file(version): | |
716 | """ | |
717 | Verify that files for different metadata spec versions can be read | |
718 | created for each supported version: | |
719 | ||
720 | # small dummy test dataset (not naturalearth_lowres, as this can change over time) | |
721 | from shapely.geometry import box, MultiPolygon | |
722 | df = geopandas.GeoDataFrame( | |
723 | {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]}, | |
724 | geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)], | |
725 | crs="EPSG:4326", | |
726 | ) | |
727 | df.to_feather(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.feather') # noqa: E501 | |
728 | df.to_parquet(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.parquet') # noqa: E501 | |
729 | """ | |
730 | check_crs = Version(pyproj.__version__) >= Version("3.0.0") | |
731 | ||
732 | expected = geopandas.GeoDataFrame( | |
733 | {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]}, | |
734 | geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)], | |
735 | crs="EPSG:4326", | |
736 | ) | |
737 | ||
738 | df = geopandas.read_feather(DATA_PATH / "arrow" / f"test_data_v{version}.feather") | |
739 | assert_geodataframe_equal(df, expected, check_crs=check_crs) | |
740 | ||
741 | df = geopandas.read_parquet(DATA_PATH / "arrow" / f"test_data_v{version}.parquet") | |
742 | assert_geodataframe_equal(df, expected, check_crs=check_crs) | |
743 | ||
744 | ||
745 | def test_read_gdal_files(): | |
746 | """ | |
747 | Verify that files written by GDAL can be read by geopandas. | |
748 | Since it is currently not yet straightforward to install GDAL with | |
749 | Parquet/Arrow enabled in our conda setup, we are testing with some | |
750 | generated files included in the repo (using GDAL 3.5.0): | |
751 | ||
752 | # small dummy test dataset (not naturalearth_lowres, as this can change over time) | |
753 | from shapely.geometry import box, MultiPolygon | |
754 | df = geopandas.GeoDataFrame( | |
755 | {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]}, | |
756 | geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5,5)], | |
757 | crs="EPSG:4326", | |
758 | ) | |
759 | df.to_file("test_data.gpkg", GEOMETRY_NAME="geometry") | |
760 | and then the gpkg file is converted to Parquet/Arrow with: | |
761 | $ ogr2ogr -f Parquet -lco FID= test_data_gdal350.parquet test_data.gpkg | |
762 | $ ogr2ogr -f Arrow -lco FID= -lco GEOMETRY_ENCODING=WKB test_data_gdal350.arrow test_data.gpkg # noqa: E501 | |
763 | """ | |
764 | check_crs = Version(pyproj.__version__) >= Version("3.0.0") | |
765 | ||
766 | expected = geopandas.GeoDataFrame( | |
767 | {"col_str": ["a", "b"], "col_int": [1, 2], "col_float": [0.1, 0.2]}, | |
768 | geometry=[MultiPolygon([box(0, 0, 1, 1), box(2, 2, 3, 3)]), box(4, 4, 5, 5)], | |
769 | crs="EPSG:4326", | |
770 | ) | |
771 | ||
772 | df = geopandas.read_parquet(DATA_PATH / "arrow" / "test_data_gdal350.parquet") | |
773 | assert_geodataframe_equal(df, expected, check_crs=check_crs) | |
774 | ||
775 | df = geopandas.read_feather(DATA_PATH / "arrow" / "test_data_gdal350.arrow") | |
776 | assert_geodataframe_equal(df, expected, check_crs=check_crs) | |
777 | ||
778 | ||
779 | def test_parquet_read_partitioned_dataset(tmpdir): | |
780 | # we don't yet explicitly support this (in writing), but for Parquet it | |
781 | # works for reading (by relying on pyarrow.read_table) | |
782 | df = read_file(get_path("naturalearth_lowres")) | |
783 | ||
784 | # manually create partitioned dataset | |
785 | basedir = tmpdir / "partitioned_dataset" | |
786 | basedir.mkdir() | |
787 | df[:100].to_parquet(basedir / "data1.parquet") | |
788 | df[100:].to_parquet(basedir / "data2.parquet") | |
789 | ||
790 | result = read_parquet(basedir) | |
791 | assert_geodataframe_equal(result, df) | |
792 | ||
793 | ||
794 | def test_parquet_read_partitioned_dataset_fsspec(tmpdir): | |
795 | fsspec = pytest.importorskip("fsspec") | |
796 | ||
797 | df = read_file(get_path("naturalearth_lowres")) | |
798 | ||
799 | # manually create partitioned dataset | |
800 | memfs = fsspec.filesystem("memory") | |
801 | memfs.mkdir("partitioned_dataset") | |
802 | with memfs.open("partitioned_dataset/data1.parquet", "wb") as f: | |
803 | df[:100].to_parquet(f) | |
804 | with memfs.open("partitioned_dataset/data2.parquet", "wb") as f: | |
805 | df[100:].to_parquet(f) | |
806 | ||
807 | result = read_parquet("memory://partitioned_dataset") | |
808 | assert_geodataframe_equal(result, df) |
0 | 0 | from collections import OrderedDict |
1 | 1 | import datetime |
2 | from packaging.version import Version | |
2 | 3 | import io |
3 | 4 | import os |
4 | 5 | import pathlib |
7 | 8 | import numpy as np |
8 | 9 | import pandas as pd |
9 | 10 | |
10 | import fiona | |
11 | import pytz | |
12 | from pandas.testing import assert_series_equal | |
11 | 13 | from shapely.geometry import Point, Polygon, box |
12 | 14 | |
13 | 15 | import geopandas |
14 | 16 | from geopandas import GeoDataFrame, read_file |
15 | from geopandas.io.file import fiona_env, _detect_driver, _EXTENSION_TO_DRIVER | |
17 | from geopandas.io.file import _detect_driver, _EXTENSION_TO_DRIVER | |
16 | 18 | |
17 | 19 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal |
18 | 20 | from geopandas.tests.util import PACKAGE_DIR, validate_boro_df |
20 | 22 | import pytest |
21 | 23 | |
22 | 24 | |
25 | try: | |
26 | import pyogrio | |
27 | except ImportError: | |
28 | pyogrio = False | |
29 | ||
30 | ||
31 | try: | |
32 | import fiona | |
33 | ||
34 | FIONA_GE_1814 = Version(fiona.__version__) >= Version( | |
35 | "1.8.14" | |
36 | ) # datetime roundtrip | |
37 | except ImportError: | |
38 | fiona = False | |
39 | FIONA_GE_1814 = False | |
40 | ||
41 | ||
42 | PYOGRIO_MARK = pytest.mark.skipif(not pyogrio, reason="pyogrio not installed") | |
43 | FIONA_MARK = pytest.mark.skipif(not fiona, reason="fiona not installed") | |
44 | ||
45 | ||
23 | 46 | _CRS = "epsg:4326" |
24 | 47 | |
25 | 48 | |
49 | @pytest.fixture( | |
50 | params=[ | |
51 | pytest.param("fiona", marks=FIONA_MARK), | |
52 | pytest.param("pyogrio", marks=PYOGRIO_MARK), | |
53 | ] | |
54 | ) | |
55 | def engine(request): | |
56 | return request.param | |
57 | ||
58 | ||
59 | def skip_pyogrio_not_supported(engine): | |
60 | if engine == "pyogrio": | |
61 | pytest.skip("not supported for the pyogrio engine") | |
62 | ||
63 | ||
26 | 64 | @pytest.fixture |
27 | def df_nybb(): | |
65 | def df_nybb(engine): | |
28 | 66 | nybb_path = geopandas.datasets.get_path("nybb") |
29 | df = read_file(nybb_path) | |
67 | df = read_file(nybb_path, engine=engine) | |
30 | 68 | return df |
31 | 69 | |
32 | 70 | |
71 | 109 | ] |
72 | 110 | |
73 | 111 | |
74 | def assert_correct_driver(file_path, ext): | |
112 | def assert_correct_driver(file_path, ext, engine): | |
75 | 113 | # check the expected driver |
76 | 114 | expected_driver = "ESRI Shapefile" if ext == "" else _EXTENSION_TO_DRIVER[ext] |
77 | with fiona.open(str(file_path)) as fds: | |
78 | assert fds.driver == expected_driver | |
115 | ||
116 | if engine == "fiona": | |
117 | with fiona.open(str(file_path)) as fds: | |
118 | assert fds.driver == expected_driver | |
119 | else: | |
120 | # TODO pyogrio doesn't yet provide a way to check the driver of a file | |
121 | return | |
79 | 122 | |
80 | 123 | |
81 | 124 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
82 | def test_to_file(tmpdir, df_nybb, df_null, driver, ext): | |
125 | def test_to_file(tmpdir, df_nybb, df_null, driver, ext, engine): | |
83 | 126 | """Test to_file and from_file""" |
84 | 127 | tempfilename = os.path.join(str(tmpdir), "boros." + ext) |
85 | df_nybb.to_file(tempfilename, driver=driver) | |
128 | df_nybb.to_file(tempfilename, driver=driver, engine=engine) | |
86 | 129 | # Read layer back in |
87 | df = GeoDataFrame.from_file(tempfilename) | |
130 | df = GeoDataFrame.from_file(tempfilename, engine=engine) | |
88 | 131 | assert "geometry" in df |
89 | 132 | assert len(df) == 5 |
90 | 133 | assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"]) |
91 | 134 | |
92 | 135 | # Write layer with null geometry out to file |
93 | 136 | tempfilename = os.path.join(str(tmpdir), "null_geom" + ext) |
94 | df_null.to_file(tempfilename, driver=driver) | |
137 | df_null.to_file(tempfilename, driver=driver, engine=engine) | |
95 | 138 | # Read layer back in |
96 | df = GeoDataFrame.from_file(tempfilename) | |
139 | df = GeoDataFrame.from_file(tempfilename, engine=engine) | |
97 | 140 | assert "geometry" in df |
98 | 141 | assert len(df) == 2 |
99 | 142 | assert np.alltrue(df["Name"].values == df_null["Name"]) |
100 | 143 | # check the expected driver |
101 | assert_correct_driver(tempfilename, ext) | |
144 | assert_correct_driver(tempfilename, ext, engine) | |
102 | 145 | |
103 | 146 | |
104 | 147 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
105 | def test_to_file_pathlib(tmpdir, df_nybb, df_null, driver, ext): | |
148 | def test_to_file_pathlib(tmpdir, df_nybb, driver, ext, engine): | |
106 | 149 | """Test to_file and from_file""" |
107 | 150 | temppath = pathlib.Path(os.path.join(str(tmpdir), "boros." + ext)) |
108 | df_nybb.to_file(temppath, driver=driver) | |
151 | df_nybb.to_file(temppath, driver=driver, engine=engine) | |
109 | 152 | # Read layer back in |
110 | df = GeoDataFrame.from_file(temppath) | |
153 | df = GeoDataFrame.from_file(temppath, engine=engine) | |
111 | 154 | assert "geometry" in df |
112 | 155 | assert len(df) == 5 |
113 | 156 | assert np.alltrue(df["BoroName"].values == df_nybb["BoroName"]) |
114 | 157 | # check the expected driver |
115 | assert_correct_driver(temppath, ext) | |
158 | assert_correct_driver(temppath, ext, engine) | |
116 | 159 | |
117 | 160 | |
118 | 161 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
119 | def test_to_file_bool(tmpdir, driver, ext): | |
162 | def test_to_file_bool(tmpdir, driver, ext, engine): | |
120 | 163 | """Test error raise when writing with a boolean column (GH #437).""" |
121 | 164 | tempfilename = os.path.join(str(tmpdir), "temp.{0}".format(ext)) |
122 | 165 | df = GeoDataFrame( |
123 | 166 | { |
124 | "a": [1, 2, 3], | |
125 | "b": [True, False, True], | |
167 | "col": [True, False, True], | |
126 | 168 | "geometry": [Point(0, 0), Point(1, 1), Point(2, 2)], |
127 | 169 | }, |
128 | 170 | crs=4326, |
129 | 171 | ) |
130 | 172 | |
131 | df.to_file(tempfilename, driver=driver) | |
132 | result = read_file(tempfilename) | |
173 | df.to_file(tempfilename, driver=driver, engine=engine) | |
174 | result = read_file(tempfilename, engine=engine) | |
133 | 175 | if ext in (".shp", ""): |
134 | 176 | # Shapefile does not support boolean, so is read back as int |
135 | df["b"] = df["b"].astype("int64") | |
177 | if engine == "fiona": | |
178 | df["col"] = df["col"].astype("int64") | |
179 | else: | |
180 | df["col"] = df["col"].astype("int32") | |
136 | 181 | assert_geodataframe_equal(result, df) |
137 | 182 | # check the expected driver |
138 | assert_correct_driver(tempfilename, ext) | |
139 | ||
140 | ||
141 | def test_to_file_datetime(tmpdir): | |
183 | assert_correct_driver(tempfilename, ext, engine) | |
184 | ||
185 | ||
186 | TEST_DATE = datetime.datetime(2021, 11, 21, 1, 7, 43, 17500) | |
187 | eastern = pytz.timezone("US/Eastern") | |
188 | ||
189 | datetime_type_tests = (TEST_DATE, eastern.localize(TEST_DATE)) | |
190 | ||
191 | ||
192 | @pytest.mark.parametrize( | |
193 | "time", datetime_type_tests, ids=("naive_datetime", "datetime_with_timezone") | |
194 | ) | |
195 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) | |
196 | def test_to_file_datetime(tmpdir, driver, ext, time, engine): | |
142 | 197 | """Test writing a data file with the datetime column type""" |
143 | tempfilename = os.path.join(str(tmpdir), "test_datetime.gpkg") | |
198 | if engine == "pyogrio" and time.tzinfo is not None: | |
199 | # TODO | |
200 | pytest.skip("pyogrio doesn't yet support timezones") | |
201 | if ext in (".shp", ""): | |
202 | pytest.skip(f"Driver corresponding to ext {ext} doesn't support dt fields") | |
203 | if time.tzinfo is not None and FIONA_GE_1814 is False: | |
204 | # https://github.com/Toblerity/Fiona/pull/915 | |
205 | pytest.skip("Fiona >= 1.8.14 needed for timezone support") | |
206 | ||
207 | tempfilename = os.path.join(str(tmpdir), f"test_datetime{ext}") | |
144 | 208 | point = Point(0, 0) |
145 | now = datetime.datetime.now() | |
146 | df = GeoDataFrame({"a": [1, 2], "b": [now, now]}, geometry=[point, point], crs=4326) | |
147 | df.to_file(tempfilename, driver="GPKG") | |
148 | df_read = read_file(tempfilename) | |
149 | assert_geoseries_equal(df.geometry, df_read.geometry) | |
209 | ||
210 | df = GeoDataFrame( | |
211 | {"a": [1.0, 2.0], "b": [time, time]}, geometry=[point, point], crs=4326 | |
212 | ) | |
213 | if FIONA_GE_1814: | |
214 | fiona_precision_limit = "ms" | |
215 | else: | |
216 | fiona_precision_limit = "s" | |
217 | df["b"] = df["b"].dt.round(freq=fiona_precision_limit) | |
218 | ||
219 | df.to_file(tempfilename, driver=driver, engine=engine) | |
220 | df_read = read_file(tempfilename, engine=engine) | |
221 | ||
222 | assert_geodataframe_equal(df.drop(columns=["b"]), df_read.drop(columns=["b"])) | |
223 | if df["b"].dt.tz is not None: | |
224 | # US/Eastern becomes pytz.FixedOffset(-300) when read from file | |
225 | # so compare fairly in terms of UTC | |
226 | assert_series_equal( | |
227 | df["b"].dt.tz_convert(pytz.utc), df_read["b"].dt.tz_convert(pytz.utc) | |
228 | ) | |
229 | else: | |
230 | assert_series_equal(df["b"], df_read["b"]) | |
150 | 231 | |
151 | 232 | |
152 | 233 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
153 | def test_to_file_with_point_z(tmpdir, ext, driver): | |
234 | def test_to_file_with_point_z(tmpdir, ext, driver, engine): | |
154 | 235 | """Test that 3D geometries are retained in writes (GH #612).""" |
155 | 236 | |
156 | 237 | tempfilename = os.path.join(str(tmpdir), "test_3Dpoint" + ext) |
157 | 238 | point3d = Point(0, 0, 500) |
158 | 239 | point2d = Point(1, 1) |
159 | 240 | df = GeoDataFrame({"a": [1, 2]}, geometry=[point3d, point2d], crs=_CRS) |
160 | df.to_file(tempfilename, driver=driver) | |
161 | df_read = GeoDataFrame.from_file(tempfilename) | |
241 | df.to_file(tempfilename, driver=driver, engine=engine) | |
242 | df_read = GeoDataFrame.from_file(tempfilename, engine=engine) | |
162 | 243 | assert_geoseries_equal(df.geometry, df_read.geometry) |
163 | 244 | # check the expected driver |
164 | assert_correct_driver(tempfilename, ext) | |
245 | assert_correct_driver(tempfilename, ext, engine) | |
165 | 246 | |
166 | 247 | |
167 | 248 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
168 | def test_to_file_with_poly_z(tmpdir, ext, driver): | |
249 | def test_to_file_with_poly_z(tmpdir, ext, driver, engine): | |
169 | 250 | """Test that 3D geometries are retained in writes (GH #612).""" |
170 | 251 | |
171 | 252 | tempfilename = os.path.join(str(tmpdir), "test_3Dpoly" + ext) |
172 | 253 | poly3d = Polygon([[0, 0, 5], [0, 1, 5], [1, 1, 5], [1, 0, 5]]) |
173 | 254 | poly2d = Polygon([[0, 0], [0, 1], [1, 1], [1, 0]]) |
174 | 255 | df = GeoDataFrame({"a": [1, 2]}, geometry=[poly3d, poly2d], crs=_CRS) |
175 | df.to_file(tempfilename, driver=driver) | |
176 | df_read = GeoDataFrame.from_file(tempfilename) | |
256 | df.to_file(tempfilename, driver=driver, engine=engine) | |
257 | df_read = GeoDataFrame.from_file(tempfilename, engine=engine) | |
177 | 258 | assert_geoseries_equal(df.geometry, df_read.geometry) |
178 | 259 | # check the expected driver |
179 | assert_correct_driver(tempfilename, ext) | |
180 | ||
181 | ||
182 | def test_to_file_types(tmpdir, df_points): | |
260 | assert_correct_driver(tempfilename, ext, engine) | |
261 | ||
262 | ||
263 | def test_to_file_types(tmpdir, df_points, engine): | |
183 | 264 | """Test various integer type columns (GH#93)""" |
184 | 265 | tempfilename = os.path.join(str(tmpdir), "int.shp") |
185 | 266 | int_types = [ |
199 | 280 | for i, dtype in enumerate(int_types) |
200 | 281 | ) |
201 | 282 | df = GeoDataFrame(data, geometry=geometry) |
202 | df.to_file(tempfilename) | |
203 | ||
204 | ||
205 | def test_to_file_int64(tmpdir, df_points): | |
283 | df.to_file(tempfilename, engine=engine) | |
284 | ||
285 | ||
286 | def test_to_file_int64(tmpdir, df_points, engine): | |
287 | skip_pyogrio_not_supported(engine) # TODO | |
206 | 288 | tempfilename = os.path.join(str(tmpdir), "int64.shp") |
207 | 289 | geometry = df_points.geometry |
208 | 290 | df = GeoDataFrame(geometry=geometry) |
209 | 291 | df["data"] = pd.array([1, np.nan] * 5, dtype=pd.Int64Dtype()) |
210 | df.to_file(tempfilename) | |
211 | df_read = GeoDataFrame.from_file(tempfilename) | |
292 | df.to_file(tempfilename, engine=engine) | |
293 | df_read = GeoDataFrame.from_file(tempfilename, engine=engine) | |
212 | 294 | assert_geodataframe_equal(df_read, df, check_dtype=False, check_like=True) |
213 | 295 | |
214 | 296 | |
215 | def test_to_file_empty(tmpdir): | |
216 | input_empty_df = GeoDataFrame() | |
297 | def test_to_file_empty(tmpdir, engine): | |
298 | input_empty_df = GeoDataFrame(columns=["geometry"]) | |
217 | 299 | tempfilename = os.path.join(str(tmpdir), "test.shp") |
218 | with pytest.raises(ValueError, match="Cannot write empty DataFrame to file."): | |
219 | input_empty_df.to_file(tempfilename) | |
300 | with pytest.warns(UserWarning): | |
301 | input_empty_df.to_file(tempfilename, engine=engine) | |
220 | 302 | |
221 | 303 | |
222 | 304 | def test_to_file_privacy(tmpdir, df_nybb): |
223 | 305 | tempfilename = os.path.join(str(tmpdir), "test.shp") |
224 | with pytest.warns(DeprecationWarning): | |
306 | with pytest.warns(FutureWarning): | |
225 | 307 | geopandas.io.file.to_file(df_nybb, tempfilename) |
226 | 308 | |
227 | 309 | |
228 | def test_to_file_schema(tmpdir, df_nybb): | |
310 | def test_to_file_schema(tmpdir, df_nybb, engine): | |
229 | 311 | """ |
230 | 312 | Ensure that the file is written according to the schema |
231 | 313 | if it is specified |
242 | 324 | ) |
243 | 325 | schema = {"geometry": "Polygon", "properties": properties} |
244 | 326 | |
245 | # Take the first 2 features to speed things up a bit | |
246 | df_nybb.iloc[:2].to_file(tempfilename, schema=schema) | |
247 | ||
248 | with fiona.open(tempfilename) as f: | |
249 | result_schema = f.schema | |
250 | ||
251 | assert result_schema == schema | |
252 | ||
253 | ||
254 | def test_to_file_column_len(tmpdir, df_points): | |
327 | if engine == "pyogrio": | |
328 | with pytest.raises(ValueError): | |
329 | df_nybb.iloc[:2].to_file(tempfilename, schema=schema, engine=engine) | |
330 | else: | |
331 | # Take the first 2 features to speed things up a bit | |
332 | df_nybb.iloc[:2].to_file(tempfilename, schema=schema, engine=engine) | |
333 | ||
334 | import fiona | |
335 | ||
336 | with fiona.open(tempfilename) as f: | |
337 | result_schema = f.schema | |
338 | ||
339 | assert result_schema == schema | |
340 | ||
341 | ||
342 | def test_to_file_crs(tmpdir, engine): | |
343 | """ | |
344 | Ensure that the file is written according to the crs | |
345 | if it is specified | |
346 | """ | |
347 | df = read_file(geopandas.datasets.get_path("nybb"), engine=engine) | |
348 | tempfilename = os.path.join(str(tmpdir), "crs.shp") | |
349 | ||
350 | # save correct CRS | |
351 | df.to_file(tempfilename, engine=engine) | |
352 | result = GeoDataFrame.from_file(tempfilename, engine=engine) | |
353 | assert result.crs == df.crs | |
354 | ||
355 | if engine == "pyogrio": | |
356 | with pytest.raises(ValueError, match="Passing 'crs' it not supported"): | |
357 | df.to_file(tempfilename, crs=3857, engine=engine) | |
358 | return | |
359 | ||
360 | # overwrite CRS | |
361 | df.to_file(tempfilename, crs=3857, engine=engine) | |
362 | result = GeoDataFrame.from_file(tempfilename, engine=engine) | |
363 | assert result.crs == "epsg:3857" | |
364 | ||
365 | # specify CRS for gdf without one | |
366 | df2 = df.copy() | |
367 | df2.crs = None | |
368 | df2.to_file(tempfilename, crs=2263, engine=engine) | |
369 | df = GeoDataFrame.from_file(tempfilename, engine=engine) | |
370 | assert df.crs == "epsg:2263" | |
371 | ||
372 | ||
373 | def test_to_file_column_len(tmpdir, df_points, engine): | |
255 | 374 | """ |
256 | 375 | Ensure that a warning about truncation is given when a geodataframe with |
257 | 376 | column names longer than 10 characters is saved to shapefile |
264 | 383 | with pytest.warns( |
265 | 384 | UserWarning, match="Column names longer than 10 characters will be truncated" |
266 | 385 | ): |
267 | df.to_file(tempfilename, driver="ESRI Shapefile") | |
386 | df.to_file(tempfilename, driver="ESRI Shapefile", engine=engine) | |
387 | ||
388 | ||
389 | def test_to_file_with_duplicate_columns(tmpdir, engine): | |
390 | df = GeoDataFrame(data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)]) | |
391 | tempfilename = os.path.join(str(tmpdir), "duplicate.shp") | |
392 | with pytest.raises( | |
393 | ValueError, match="GeoDataFrame cannot contain duplicated column names." | |
394 | ): | |
395 | df.to_file(tempfilename, engine=engine) | |
268 | 396 | |
269 | 397 | |
270 | 398 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
271 | def test_append_file(tmpdir, df_nybb, df_null, driver, ext): | |
399 | def test_append_file(tmpdir, df_nybb, df_null, driver, ext, engine): | |
272 | 400 | """Test to_file with append mode and from_file""" |
401 | skip_pyogrio_not_supported(engine) | |
273 | 402 | from fiona import supported_drivers |
274 | 403 | |
275 | 404 | tempfilename = os.path.join(str(tmpdir), "boros" + ext) |
277 | 406 | if "a" not in supported_drivers[driver]: |
278 | 407 | return None |
279 | 408 | |
280 | df_nybb.to_file(tempfilename, driver=driver) | |
281 | df_nybb.to_file(tempfilename, mode="a", driver=driver) | |
409 | df_nybb.to_file(tempfilename, driver=driver, engine=engine) | |
410 | df_nybb.to_file(tempfilename, mode="a", driver=driver, engine=engine) | |
282 | 411 | # Read layer back in |
283 | df = GeoDataFrame.from_file(tempfilename) | |
412 | df = GeoDataFrame.from_file(tempfilename, engine=engine) | |
284 | 413 | assert "geometry" in df |
285 | 414 | assert len(df) == (5 * 2) |
286 | 415 | expected = pd.concat([df_nybb] * 2, ignore_index=True) |
288 | 417 | |
289 | 418 | # Write layer with null geometry out to file |
290 | 419 | tempfilename = os.path.join(str(tmpdir), "null_geom" + ext) |
291 | df_null.to_file(tempfilename, driver=driver) | |
292 | df_null.to_file(tempfilename, mode="a", driver=driver) | |
420 | df_null.to_file(tempfilename, driver=driver, engine=engine) | |
421 | df_null.to_file(tempfilename, mode="a", driver=driver, engine=engine) | |
293 | 422 | # Read layer back in |
294 | df = GeoDataFrame.from_file(tempfilename) | |
423 | df = GeoDataFrame.from_file(tempfilename, engine=engine) | |
295 | 424 | assert "geometry" in df |
296 | 425 | assert len(df) == (2 * 2) |
297 | 426 | expected = pd.concat([df_null] * 2, ignore_index=True) |
299 | 428 | |
300 | 429 | |
301 | 430 | @pytest.mark.parametrize("driver,ext", driver_ext_pairs) |
302 | def test_empty_crs(tmpdir, driver, ext): | |
431 | def test_empty_crs(tmpdir, driver, ext, engine): | |
303 | 432 | """Test handling of undefined CRS with GPKG driver (GH #1975).""" |
304 | 433 | if ext == ".gpkg": |
305 | 434 | pytest.xfail("GPKG is read with Undefined geographic SRS.") |
307 | 436 | tempfilename = os.path.join(str(tmpdir), "boros" + ext) |
308 | 437 | df = GeoDataFrame( |
309 | 438 | { |
310 | "a": [1, 2, 3], | |
439 | "a": [1.0, 2.0, 3.0], | |
311 | 440 | "geometry": [Point(0, 0), Point(1, 1), Point(2, 2)], |
312 | 441 | }, |
313 | 442 | ) |
314 | 443 | |
315 | df.to_file(tempfilename, driver=driver) | |
316 | result = read_file(tempfilename) | |
444 | df.to_file(tempfilename, driver=driver, engine=engine) | |
445 | result = read_file(tempfilename, engine=engine) | |
317 | 446 | |
318 | 447 | if ext == ".geojson": |
319 | 448 | # geojson by default assumes epsg:4326 |
327 | 456 | # ----------------------------------------------------------------------------- |
328 | 457 | |
329 | 458 | |
330 | with fiona.open(geopandas.datasets.get_path("nybb")) as f: | |
331 | CRS = f.crs["init"] if "init" in f.crs else f.crs_wkt | |
332 | NYBB_COLUMNS = list(f.meta["schema"]["properties"].keys()) | |
333 | ||
334 | ||
335 | def test_read_file(df_nybb): | |
336 | df = df_nybb.rename(columns=lambda x: x.lower()) | |
459 | NYBB_CRS = "epsg:2263" | |
460 | ||
461 | ||
462 | def test_read_file(engine): | |
463 | df = read_file(geopandas.datasets.get_path("nybb"), engine=engine) | |
337 | 464 | validate_boro_df(df) |
338 | assert df.crs == CRS | |
339 | # get lower case columns, and exclude geometry column from comparison | |
340 | lower_columns = [c.lower() for c in NYBB_COLUMNS] | |
341 | assert (df.columns[:-1] == lower_columns).all() | |
465 | assert df.crs == NYBB_CRS | |
466 | expected_columns = ["BoroCode", "BoroName", "Shape_Leng", "Shape_Area"] | |
467 | assert (df.columns[:-1] == expected_columns).all() | |
342 | 468 | |
343 | 469 | |
344 | 470 | @pytest.mark.web |
345 | def test_read_file_remote_geojson_url(): | |
471 | def test_read_file_remote_geojson_url(engine): | |
346 | 472 | url = ( |
347 | 473 | "https://raw.githubusercontent.com/geopandas/geopandas/" |
348 | "master/geopandas/tests/data/null_geom.geojson" | |
349 | ) | |
350 | gdf = read_file(url) | |
474 | "main/geopandas/tests/data/null_geom.geojson" | |
475 | ) | |
476 | gdf = read_file(url, engine=engine) | |
351 | 477 | assert isinstance(gdf, geopandas.GeoDataFrame) |
352 | 478 | |
353 | 479 | |
354 | 480 | @pytest.mark.web |
355 | def test_read_file_remote_zipfile_url(): | |
481 | def test_read_file_remote_zipfile_url(engine): | |
356 | 482 | url = ( |
357 | 483 | "https://raw.githubusercontent.com/geopandas/geopandas/" |
358 | "master/geopandas/datasets/nybb_16a.zip" | |
359 | ) | |
360 | gdf = read_file(url) | |
484 | "main/geopandas/datasets/nybb_16a.zip" | |
485 | ) | |
486 | gdf = read_file(url, engine=engine) | |
361 | 487 | assert isinstance(gdf, geopandas.GeoDataFrame) |
362 | 488 | |
363 | 489 | |
364 | def test_read_file_textio(file_path): | |
490 | def test_read_file_textio(file_path, engine): | |
365 | 491 | file_text_stream = open(file_path) |
366 | 492 | file_stringio = io.StringIO(open(file_path).read()) |
367 | gdf_text_stream = read_file(file_text_stream) | |
368 | gdf_stringio = read_file(file_stringio) | |
493 | gdf_text_stream = read_file(file_text_stream, engine=engine) | |
494 | gdf_stringio = read_file(file_stringio, engine=engine) | |
369 | 495 | assert isinstance(gdf_text_stream, geopandas.GeoDataFrame) |
370 | 496 | assert isinstance(gdf_stringio, geopandas.GeoDataFrame) |
371 | 497 | |
372 | 498 | |
373 | def test_read_file_bytesio(file_path): | |
499 | def test_read_file_bytesio(file_path, engine): | |
374 | 500 | file_binary_stream = open(file_path, "rb") |
375 | 501 | file_bytesio = io.BytesIO(open(file_path, "rb").read()) |
376 | gdf_binary_stream = read_file(file_binary_stream) | |
377 | gdf_bytesio = read_file(file_bytesio) | |
502 | gdf_binary_stream = read_file(file_binary_stream, engine=engine) | |
503 | gdf_bytesio = read_file(file_bytesio, engine=engine) | |
378 | 504 | assert isinstance(gdf_binary_stream, geopandas.GeoDataFrame) |
379 | 505 | assert isinstance(gdf_bytesio, geopandas.GeoDataFrame) |
380 | 506 | |
381 | 507 | |
382 | def test_read_file_raw_stream(file_path): | |
508 | def test_read_file_raw_stream(file_path, engine): | |
383 | 509 | file_raw_stream = open(file_path, "rb", buffering=0) |
384 | gdf_raw_stream = read_file(file_raw_stream) | |
510 | gdf_raw_stream = read_file(file_raw_stream, engine=engine) | |
385 | 511 | assert isinstance(gdf_raw_stream, geopandas.GeoDataFrame) |
386 | 512 | |
387 | 513 | |
388 | def test_read_file_pathlib(file_path): | |
514 | def test_read_file_pathlib(file_path, engine): | |
389 | 515 | path_object = pathlib.Path(file_path) |
390 | gdf_path_object = read_file(path_object) | |
516 | gdf_path_object = read_file(path_object, engine=engine) | |
391 | 517 | assert isinstance(gdf_path_object, geopandas.GeoDataFrame) |
392 | 518 | |
393 | 519 | |
394 | def test_read_file_tempfile(): | |
520 | def test_read_file_tempfile(engine): | |
395 | 521 | temp = tempfile.TemporaryFile() |
396 | 522 | temp.write( |
397 | 523 | b""" |
408 | 534 | """ |
409 | 535 | ) |
410 | 536 | temp.seek(0) |
411 | gdf_tempfile = geopandas.read_file(temp) | |
537 | gdf_tempfile = geopandas.read_file(temp, engine=engine) | |
412 | 538 | assert isinstance(gdf_tempfile, geopandas.GeoDataFrame) |
413 | 539 | temp.close() |
414 | 540 | |
415 | 541 | |
416 | def test_read_binary_file_fsspec(): | |
542 | def test_read_binary_file_fsspec(engine): | |
417 | 543 | fsspec = pytest.importorskip("fsspec") |
418 | 544 | # Remove the zip scheme so fsspec doesn't open as a zipped file, |
419 | 545 | # instead we want to read as bytes and let fiona decode it. |
420 | 546 | path = geopandas.datasets.get_path("nybb")[6:] |
421 | 547 | with fsspec.open(path, "rb") as f: |
422 | gdf = read_file(f) | |
548 | gdf = read_file(f, engine=engine) | |
423 | 549 | assert isinstance(gdf, geopandas.GeoDataFrame) |
424 | 550 | |
425 | 551 | |
426 | def test_read_text_file_fsspec(file_path): | |
552 | def test_read_text_file_fsspec(file_path, engine): | |
427 | 553 | fsspec = pytest.importorskip("fsspec") |
428 | 554 | with fsspec.open(file_path, "r") as f: |
429 | gdf = read_file(f) | |
555 | gdf = read_file(f, engine=engine) | |
430 | 556 | assert isinstance(gdf, geopandas.GeoDataFrame) |
431 | 557 | |
432 | 558 | |
433 | def test_infer_zipped_file(): | |
559 | def test_infer_zipped_file(engine): | |
434 | 560 | # Remove the zip scheme so that the test for a zipped file can |
435 | 561 | # check it and add it back. |
436 | 562 | path = geopandas.datasets.get_path("nybb")[6:] |
437 | gdf = read_file(path) | |
563 | gdf = read_file(path, engine=engine) | |
438 | 564 | assert isinstance(gdf, geopandas.GeoDataFrame) |
439 | 565 | |
440 | 566 | # Check that it can successfully add a zip scheme to a path that already has a |
441 | 567 | # scheme |
442 | gdf = read_file("file+file://" + path) | |
568 | gdf = read_file("file+file://" + path, engine=engine) | |
443 | 569 | assert isinstance(gdf, geopandas.GeoDataFrame) |
444 | 570 | |
445 | 571 | # Check that it can add a zip scheme for a path that includes a subpath |
446 | 572 | # within the archive. |
447 | gdf = read_file(path + "!nybb.shp") | |
573 | gdf = read_file(path + "!nybb.shp", engine=engine) | |
448 | 574 | assert isinstance(gdf, geopandas.GeoDataFrame) |
449 | 575 | |
450 | 576 | |
451 | def test_allow_legacy_gdal_path(): | |
577 | def test_allow_legacy_gdal_path(engine): | |
452 | 578 | # Construct a GDAL-style zip path. |
453 | 579 | path = "/vsizip/" + geopandas.datasets.get_path("nybb")[6:] |
454 | gdf = read_file(path) | |
580 | gdf = read_file(path, engine=engine) | |
455 | 581 | assert isinstance(gdf, geopandas.GeoDataFrame) |
456 | 582 | |
457 | 583 | |
458 | def test_read_file_filtered__bbox(df_nybb): | |
584 | def test_read_file_filtered__bbox(df_nybb, engine): | |
459 | 585 | nybb_filename = geopandas.datasets.get_path("nybb") |
460 | 586 | bbox = ( |
461 | 587 | 1031051.7879884212, |
463 | 589 | 1047224.3104931959, |
464 | 590 | 244317.30894023244, |
465 | 591 | ) |
466 | filtered_df = read_file(nybb_filename, bbox=bbox) | |
592 | filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine) | |
467 | 593 | expected = df_nybb[df_nybb["BoroName"].isin(["Bronx", "Queens"])] |
468 | 594 | assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True)) |
469 | 595 | |
470 | 596 | |
471 | def test_read_file_filtered__bbox__polygon(df_nybb): | |
597 | def test_read_file_filtered__bbox__polygon(df_nybb, engine): | |
472 | 598 | nybb_filename = geopandas.datasets.get_path("nybb") |
473 | 599 | bbox = box( |
474 | 600 | 1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244 |
475 | 601 | ) |
476 | filtered_df = read_file(nybb_filename, bbox=bbox) | |
602 | filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine) | |
477 | 603 | expected = df_nybb[df_nybb["BoroName"].isin(["Bronx", "Queens"])] |
478 | 604 | assert_geodataframe_equal(filtered_df, expected.reset_index(drop=True)) |
479 | 605 | |
480 | 606 | |
481 | def test_read_file_filtered__rows(df_nybb): | |
607 | def test_read_file_filtered__rows(df_nybb, engine): | |
482 | 608 | nybb_filename = geopandas.datasets.get_path("nybb") |
483 | filtered_df = read_file(nybb_filename, rows=1) | |
609 | filtered_df = read_file(nybb_filename, rows=1, engine=engine) | |
484 | 610 | assert_geodataframe_equal(filtered_df, df_nybb.iloc[[0], :]) |
485 | 611 | |
486 | 612 | |
487 | def test_read_file_filtered__rows_slice(df_nybb): | |
613 | def test_read_file_filtered__rows_slice(df_nybb, engine): | |
488 | 614 | nybb_filename = geopandas.datasets.get_path("nybb") |
489 | filtered_df = read_file(nybb_filename, rows=slice(1, 3)) | |
615 | filtered_df = read_file(nybb_filename, rows=slice(1, 3), engine=engine) | |
490 | 616 | assert_geodataframe_equal(filtered_df, df_nybb.iloc[1:3, :].reset_index(drop=True)) |
491 | 617 | |
492 | 618 | |
493 | 619 | @pytest.mark.filterwarnings( |
494 | 620 | "ignore:Layer does not support OLC_FASTFEATURECOUNT:RuntimeWarning" |
495 | 621 | ) # for the slice with -1 |
496 | def test_read_file_filtered__rows_bbox(df_nybb): | |
622 | def test_read_file_filtered__rows_bbox(df_nybb, engine): | |
497 | 623 | nybb_filename = geopandas.datasets.get_path("nybb") |
498 | 624 | bbox = ( |
499 | 625 | 1031051.7879884212, |
501 | 627 | 1047224.3104931959, |
502 | 628 | 244317.30894023244, |
503 | 629 | ) |
504 | # combination bbox and rows (rows slice applied after bbox filtering!) | |
505 | filtered_df = read_file(nybb_filename, bbox=bbox, rows=slice(4, None)) | |
506 | assert filtered_df.empty | |
507 | filtered_df = read_file(nybb_filename, bbox=bbox, rows=slice(-1, None)) | |
508 | assert_geodataframe_equal(filtered_df, df_nybb.iloc[4:, :].reset_index(drop=True)) | |
509 | ||
510 | ||
511 | def test_read_file_filtered_rows_invalid(): | |
630 | if engine == "pyogrio": | |
631 | with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"): | |
632 | # combination bbox and rows (rows slice applied after bbox filtering!) | |
633 | filtered_df = read_file( | |
634 | nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine | |
635 | ) | |
636 | else: # fiona | |
637 | # combination bbox and rows (rows slice applied after bbox filtering!) | |
638 | filtered_df = read_file( | |
639 | nybb_filename, bbox=bbox, rows=slice(4, None), engine=engine | |
640 | ) | |
641 | assert filtered_df.empty | |
642 | ||
643 | if engine == "pyogrio": | |
644 | # TODO: support negative rows in pyogrio | |
645 | with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"): | |
646 | filtered_df = read_file( | |
647 | nybb_filename, bbox=bbox, rows=slice(-1, None), engine=engine | |
648 | ) | |
649 | else: | |
650 | filtered_df = read_file( | |
651 | nybb_filename, bbox=bbox, rows=slice(-1, None), engine=engine | |
652 | ) | |
653 | filtered_df["BoroCode"] = filtered_df["BoroCode"].astype("int64") | |
654 | assert_geodataframe_equal( | |
655 | filtered_df, df_nybb.iloc[4:, :].reset_index(drop=True) | |
656 | ) | |
657 | ||
658 | ||
659 | def test_read_file_filtered_rows_invalid(engine): | |
512 | 660 | with pytest.raises(TypeError): |
513 | read_file(geopandas.datasets.get_path("nybb"), rows="not_a_slice") | |
514 | ||
515 | ||
516 | def test_read_file__ignore_geometry(): | |
661 | read_file( | |
662 | geopandas.datasets.get_path("nybb"), rows="not_a_slice", engine=engine | |
663 | ) | |
664 | ||
665 | ||
666 | def test_read_file__ignore_geometry(engine): | |
517 | 667 | pdf = geopandas.read_file( |
518 | geopandas.datasets.get_path("naturalearth_lowres"), ignore_geometry=True | |
668 | geopandas.datasets.get_path("naturalearth_lowres"), | |
669 | ignore_geometry=True, | |
670 | engine=engine, | |
519 | 671 | ) |
520 | 672 | assert "geometry" not in pdf.columns |
521 | 673 | assert isinstance(pdf, pd.DataFrame) and not isinstance(pdf, geopandas.GeoDataFrame) |
522 | 674 | |
523 | 675 | |
524 | def test_read_file__ignore_all_fields(): | |
676 | def test_read_file__ignore_all_fields(engine): | |
677 | skip_pyogrio_not_supported(engine) # pyogrio has "columns" keyword instead | |
525 | 678 | gdf = geopandas.read_file( |
526 | 679 | geopandas.datasets.get_path("naturalearth_lowres"), |
527 | 680 | ignore_fields=["pop_est", "continent", "name", "iso_a3", "gdp_md_est"], |
681 | engine="fiona", | |
528 | 682 | ) |
529 | 683 | assert gdf.columns.tolist() == ["geometry"] |
530 | 684 | |
531 | 685 | |
532 | def test_read_file_filtered_with_gdf_boundary(df_nybb): | |
686 | @PYOGRIO_MARK | |
687 | def test_read_file__columns(): | |
688 | # TODO: this is only support for pyogrio, but we could mimic it for fiona as well | |
689 | gdf = geopandas.read_file( | |
690 | geopandas.datasets.get_path("naturalearth_lowres"), | |
691 | columns=["name", "pop_est"], | |
692 | engine="pyogrio", | |
693 | ) | |
694 | assert gdf.columns.tolist() == ["name", "pop_est", "geometry"] | |
695 | ||
696 | ||
697 | def test_read_file_filtered_with_gdf_boundary(df_nybb, engine): | |
533 | 698 | full_df_shape = df_nybb.shape |
534 | 699 | nybb_filename = geopandas.datasets.get_path("nybb") |
535 | 700 | bbox = geopandas.GeoDataFrame( |
541 | 706 | 244317.30894023244, |
542 | 707 | ) |
543 | 708 | ], |
544 | crs=CRS, | |
545 | ) | |
546 | filtered_df = read_file(nybb_filename, bbox=bbox) | |
709 | crs=NYBB_CRS, | |
710 | ) | |
711 | filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine) | |
547 | 712 | filtered_df_shape = filtered_df.shape |
548 | 713 | assert full_df_shape != filtered_df_shape |
549 | 714 | assert filtered_df_shape == (2, 5) |
550 | 715 | |
551 | 716 | |
552 | def test_read_file_filtered_with_gdf_boundary__mask(df_nybb): | |
717 | def test_read_file_filtered_with_gdf_boundary__mask(df_nybb, engine): | |
718 | skip_pyogrio_not_supported(engine) | |
553 | 719 | gdf_mask = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres")) |
554 | 720 | gdf = geopandas.read_file( |
555 | 721 | geopandas.datasets.get_path("naturalearth_cities"), |
556 | 722 | mask=gdf_mask[gdf_mask.continent == "Africa"], |
723 | engine=engine, | |
557 | 724 | ) |
558 | 725 | filtered_df_shape = gdf.shape |
559 | 726 | assert filtered_df_shape == (50, 2) |
560 | 727 | |
561 | 728 | |
562 | def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb): | |
729 | def test_read_file_filtered_with_gdf_boundary__mask__polygon(df_nybb, engine): | |
730 | skip_pyogrio_not_supported(engine) | |
563 | 731 | full_df_shape = df_nybb.shape |
564 | 732 | nybb_filename = geopandas.datasets.get_path("nybb") |
565 | 733 | mask = box( |
566 | 734 | 1031051.7879884212, 224272.49231459625, 1047224.3104931959, 244317.30894023244 |
567 | 735 | ) |
568 | filtered_df = read_file(nybb_filename, mask=mask) | |
736 | filtered_df = read_file(nybb_filename, mask=mask, engine=engine) | |
569 | 737 | filtered_df_shape = filtered_df.shape |
570 | 738 | assert full_df_shape != filtered_df_shape |
571 | 739 | assert filtered_df_shape == (2, 5) |
572 | 740 | |
573 | 741 | |
574 | def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb): | |
742 | def test_read_file_filtered_with_gdf_boundary_mismatched_crs(df_nybb, engine): | |
743 | skip_pyogrio_not_supported(engine) | |
575 | 744 | full_df_shape = df_nybb.shape |
576 | 745 | nybb_filename = geopandas.datasets.get_path("nybb") |
577 | 746 | bbox = geopandas.GeoDataFrame( |
583 | 752 | 244317.30894023244, |
584 | 753 | ) |
585 | 754 | ], |
586 | crs=CRS, | |
755 | crs=NYBB_CRS, | |
587 | 756 | ) |
588 | 757 | bbox.to_crs(epsg=4326, inplace=True) |
589 | filtered_df = read_file(nybb_filename, bbox=bbox) | |
758 | filtered_df = read_file(nybb_filename, bbox=bbox, engine=engine) | |
590 | 759 | filtered_df_shape = filtered_df.shape |
591 | 760 | assert full_df_shape != filtered_df_shape |
592 | 761 | assert filtered_df_shape == (2, 5) |
593 | 762 | |
594 | 763 | |
595 | def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb): | |
764 | def test_read_file_filtered_with_gdf_boundary_mismatched_crs__mask(df_nybb, engine): | |
765 | skip_pyogrio_not_supported(engine) | |
596 | 766 | full_df_shape = df_nybb.shape |
597 | 767 | nybb_filename = geopandas.datasets.get_path("nybb") |
598 | 768 | mask = geopandas.GeoDataFrame( |
604 | 774 | 244317.30894023244, |
605 | 775 | ) |
606 | 776 | ], |
607 | crs=CRS, | |
777 | crs=NYBB_CRS, | |
608 | 778 | ) |
609 | 779 | mask.to_crs(epsg=4326, inplace=True) |
610 | filtered_df = read_file(nybb_filename, mask=mask.geometry) | |
780 | filtered_df = read_file(nybb_filename, mask=mask.geometry, engine=engine) | |
611 | 781 | filtered_df_shape = filtered_df.shape |
612 | 782 | assert full_df_shape != filtered_df_shape |
613 | 783 | assert filtered_df_shape == (2, 5) |
614 | 784 | |
615 | 785 | |
616 | def test_read_file_empty_shapefile(tmpdir): | |
786 | @pytest.mark.filterwarnings( | |
787 | "ignore:Layer 'b'test_empty'' does not have any features:UserWarning" | |
788 | ) | |
789 | def test_read_file_empty_shapefile(tmpdir, engine): | |
790 | if engine == "pyogrio" and not fiona: | |
791 | pytest.skip("test requires fiona to work") | |
792 | from geopandas.io.file import fiona_env | |
617 | 793 | |
618 | 794 | # create empty shapefile |
619 | 795 | meta = { |
632 | 808 | with fiona.open(fname, "w", **meta) as _: # noqa |
633 | 809 | pass |
634 | 810 | |
635 | empty = read_file(fname) | |
811 | empty = read_file(fname, engine=engine) | |
636 | 812 | assert isinstance(empty, geopandas.GeoDataFrame) |
637 | 813 | assert all(empty.columns == ["A", "Z", "geometry"]) |
638 | 814 | |
639 | 815 | |
640 | 816 | def test_read_file_privacy(tmpdir, df_nybb): |
641 | with pytest.warns(DeprecationWarning): | |
817 | with pytest.warns(FutureWarning): | |
642 | 818 | geopandas.io.file.read_file(geopandas.datasets.get_path("nybb")) |
643 | 819 | |
644 | 820 | |
661 | 837 | @pytest.mark.parametrize( |
662 | 838 | "driver,ext", [("ESRI Shapefile", "shp"), ("GeoJSON", "geojson")] |
663 | 839 | ) |
664 | def test_write_index_to_file(tmpdir, df_points, driver, ext): | |
840 | def test_write_index_to_file(tmpdir, df_points, driver, ext, engine): | |
665 | 841 | fngen = FileNumber(tmpdir, "check", ext) |
666 | 842 | |
667 | 843 | def do_checks(df, index_is_used): |
690 | 866 | |
691 | 867 | # check GeoDataFrame with default index=None to autodetect |
692 | 868 | tempfilename = next(fngen) |
693 | df.to_file(tempfilename, driver=driver, index=None) | |
694 | df_check = read_file(tempfilename) | |
869 | df.to_file(tempfilename, driver=driver, index=None, engine=engine) | |
870 | df_check = read_file(tempfilename, engine=engine) | |
695 | 871 | if len(other_cols) == 0: |
696 | 872 | expected_cols = driver_col[:] |
697 | 873 | else: |
703 | 879 | |
704 | 880 | # similar check on GeoSeries with index=None |
705 | 881 | tempfilename = next(fngen) |
706 | df.geometry.to_file(tempfilename, driver=driver, index=None) | |
707 | df_check = read_file(tempfilename) | |
882 | df.geometry.to_file(tempfilename, driver=driver, index=None, engine=engine) | |
883 | df_check = read_file(tempfilename, engine=engine) | |
708 | 884 | if index_is_used: |
709 | 885 | expected_cols = index_cols + ["geometry"] |
710 | 886 | else: |
713 | 889 | |
714 | 890 | # check GeoDataFrame with index=True |
715 | 891 | tempfilename = next(fngen) |
716 | df.to_file(tempfilename, driver=driver, index=True) | |
717 | df_check = read_file(tempfilename) | |
892 | df.to_file(tempfilename, driver=driver, index=True, engine=engine) | |
893 | df_check = read_file(tempfilename, engine=engine) | |
718 | 894 | assert list(df_check.columns) == index_cols + other_cols + ["geometry"] |
719 | 895 | |
720 | 896 | # similar check on GeoSeries with index=True |
721 | 897 | tempfilename = next(fngen) |
722 | df.geometry.to_file(tempfilename, driver=driver, index=True) | |
723 | df_check = read_file(tempfilename) | |
898 | df.geometry.to_file(tempfilename, driver=driver, index=True, engine=engine) | |
899 | df_check = read_file(tempfilename, engine=engine) | |
724 | 900 | assert list(df_check.columns) == index_cols + ["geometry"] |
725 | 901 | |
726 | 902 | # check GeoDataFrame with index=False |
727 | 903 | tempfilename = next(fngen) |
728 | df.to_file(tempfilename, driver=driver, index=False) | |
729 | df_check = read_file(tempfilename) | |
904 | df.to_file(tempfilename, driver=driver, index=False, engine=engine) | |
905 | df_check = read_file(tempfilename, engine=engine) | |
730 | 906 | if len(other_cols) == 0: |
731 | 907 | expected_cols = driver_col + ["geometry"] |
732 | 908 | else: |
735 | 911 | |
736 | 912 | # similar check on GeoSeries with index=False |
737 | 913 | tempfilename = next(fngen) |
738 | df.geometry.to_file(tempfilename, driver=driver, index=False) | |
739 | df_check = read_file(tempfilename) | |
914 | df.geometry.to_file(tempfilename, driver=driver, index=False, engine=engine) | |
915 | df_check = read_file(tempfilename, engine=engine) | |
740 | 916 | assert list(df_check.columns) == driver_col + ["geometry"] |
741 | 917 | |
742 | 918 | return |
841 | 1017 | @pytest.mark.parametrize( |
842 | 1018 | "test_file", [(pathlib.Path("~/test_file.geojson")), "~/test_file.geojson"] |
843 | 1019 | ) |
844 | def test_write_read_file(test_file): | |
1020 | def test_write_read_file(test_file, engine): | |
845 | 1021 | gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs=_CRS) |
846 | 1022 | gdf.to_file(test_file, driver="GeoJSON") |
847 | df_json = geopandas.read_file(test_file) | |
1023 | df_json = geopandas.read_file(test_file, engine=engine) | |
848 | 1024 | assert_geodataframe_equal(gdf, df_json, check_crs=True) |
849 | 1025 | os.remove(os.path.expanduser(test_file)) |
13 | 13 | |
14 | 14 | from geopandas.testing import assert_geodataframe_equal |
15 | 15 | import pytest |
16 | ||
17 | from .test_file import FIONA_MARK, PYOGRIO_MARK | |
18 | ||
16 | 19 | |
17 | 20 | # Credit: Polygons below come from Montreal city Open Data portal |
18 | 21 | # http://donnees.ville.montreal.qc.ca/dataset/unites-evaluation-fonciere |
245 | 248 | return request.param |
246 | 249 | |
247 | 250 | |
248 | def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver): | |
251 | @pytest.fixture( | |
252 | params=[ | |
253 | pytest.param("fiona", marks=FIONA_MARK), | |
254 | pytest.param("pyogrio", marks=PYOGRIO_MARK), | |
255 | ] | |
256 | ) | |
257 | def engine(request): | |
258 | return request.param | |
259 | ||
260 | ||
261 | def test_to_file_roundtrip(tmpdir, geodataframe, ogr_driver, engine): | |
249 | 262 | output_file = os.path.join(str(tmpdir), "output_file") |
250 | 263 | |
251 | 264 | expected_error = _expected_error_on(geodataframe, ogr_driver) |
252 | 265 | if expected_error: |
253 | with pytest.raises(RuntimeError, match="Failed to write record"): | |
254 | geodataframe.to_file(output_file, driver=ogr_driver) | |
266 | with pytest.raises( | |
267 | RuntimeError, match="Failed to write record|Could not add feature to layer" | |
268 | ): | |
269 | geodataframe.to_file(output_file, driver=ogr_driver, engine=engine) | |
255 | 270 | else: |
256 | geodataframe.to_file(output_file, driver=ogr_driver) | |
257 | ||
258 | reloaded = geopandas.read_file(output_file) | |
271 | geodataframe.to_file(output_file, driver=ogr_driver, engine=engine) | |
272 | ||
273 | reloaded = geopandas.read_file(output_file, engine=engine) | |
274 | ||
275 | if ogr_driver == "GeoJSON" and engine == "pyogrio": | |
276 | # For GeoJSON files, the int64 column comes back as int32 | |
277 | reloaded["a"] = reloaded["a"].astype("int64") | |
259 | 278 | |
260 | 279 | assert_geodataframe_equal(geodataframe, reloaded, check_column_type="equiv") |
1 | 1 | See generate_legacy_storage_files.py for the creation of the legacy files. |
2 | 2 | |
3 | 3 | """ |
4 | from distutils.version import LooseVersion | |
4 | from contextlib import contextmanager | |
5 | from packaging.version import Version | |
5 | 6 | import glob |
6 | 7 | import os |
7 | 8 | import pathlib |
35 | 36 | return request.param |
36 | 37 | |
37 | 38 | |
38 | @pytest.fixture | |
39 | def with_use_pygeos_false(): | |
39 | @contextmanager | |
40 | def with_use_pygeos(option): | |
40 | 41 | orig = geopandas.options.use_pygeos |
41 | geopandas.options.use_pygeos = not orig | |
42 | yield | |
43 | geopandas.options.use_pygeos = orig | |
42 | geopandas.options.use_pygeos = option | |
43 | try: | |
44 | yield | |
45 | finally: | |
46 | geopandas.options.use_pygeos = orig | |
44 | 47 | |
45 | 48 | |
46 | 49 | @pytest.mark.skipif( |
47 | compat.USE_PYGEOS or (str(pyproj.__version__) < LooseVersion("2.4")), | |
50 | compat.USE_PYGEOS or (Version(pyproj.__version__) < Version("2.4")), | |
48 | 51 | reason=( |
49 | 52 | "pygeos-based unpickling currently only works for pygeos-written files; " |
50 | 53 | "old pyproj versions can't read pickles from newer pyproj versions" |
69 | 72 | assert isinstance(result.has_sindex, bool) |
70 | 73 | |
71 | 74 | |
72 | @pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745") | |
73 | def test_pygeos_switch(tmpdir, with_use_pygeos_false): | |
74 | gdf_crs = geopandas.GeoDataFrame( | |
75 | def _create_gdf(): | |
76 | return geopandas.GeoDataFrame( | |
75 | 77 | {"a": [0.1, 0.2, 0.3], "geometry": [Point(1, 1), Point(2, 2), Point(3, 3)]}, |
76 | 78 | crs="EPSG:4326", |
77 | 79 | ) |
78 | path = str(tmpdir / "gdf_crs.pickle") | |
79 | gdf_crs.to_pickle(path) | |
80 | result = pd.read_pickle(path) | |
81 | assert_geodataframe_equal(result, gdf_crs) | |
80 | ||
81 | ||
82 | @pytest.mark.skipif(not compat.HAS_PYGEOS, reason="requires pygeos to test #1745") | |
83 | def test_pygeos_switch(tmpdir): | |
84 | # writing and reading with pygeos disabled | |
85 | with with_use_pygeos(False): | |
86 | gdf = _create_gdf() | |
87 | path = str(tmpdir / "gdf_crs1.pickle") | |
88 | gdf.to_pickle(path) | |
89 | result = pd.read_pickle(path) | |
90 | assert_geodataframe_equal(result, gdf) | |
91 | ||
92 | # writing without pygeos, reading with pygeos | |
93 | with with_use_pygeos(False): | |
94 | gdf = _create_gdf() | |
95 | path = str(tmpdir / "gdf_crs1.pickle") | |
96 | gdf.to_pickle(path) | |
97 | ||
98 | with with_use_pygeos(True): | |
99 | result = pd.read_pickle(path) | |
100 | gdf = _create_gdf() | |
101 | assert_geodataframe_equal(result, gdf) | |
102 | ||
103 | # writing with pygeos, reading without pygeos | |
104 | with with_use_pygeos(True): | |
105 | gdf = _create_gdf() | |
106 | path = str(tmpdir / "gdf_crs1.pickle") | |
107 | gdf.to_pickle(path) | |
108 | ||
109 | with with_use_pygeos(False): | |
110 | result = pd.read_pickle(path) | |
111 | gdf = _create_gdf() | |
112 | assert_geodataframe_equal(result, gdf) |
25 | 25 | @pytest.fixture() |
26 | 26 | def connection_postgis(): |
27 | 27 | """ |
28 | Initiaties a connection to a postGIS database that must already exist. | |
28 | Initiates a connection to a postGIS database that must already exist. | |
29 | 29 | See create_postgis for more information. |
30 | 30 | """ |
31 | 31 | psycopg2 = pytest.importorskip("psycopg2") |
50 | 50 | @pytest.fixture() |
51 | 51 | def engine_postgis(): |
52 | 52 | """ |
53 | Initiaties a connection engine to a postGIS database that must already exist. | |
53 | Initiates a connection engine to a postGIS database that must already exist. | |
54 | 54 | """ |
55 | 55 | sqlalchemy = pytest.importorskip("sqlalchemy") |
56 | 56 | from sqlalchemy.engine.url import URL |
325 | 325 | create_postgis(con, df_nybb) |
326 | 326 | |
327 | 327 | sql = "SELECT * FROM nybb;" |
328 | with pytest.warns(DeprecationWarning): | |
328 | with pytest.warns(FutureWarning): | |
329 | 329 | geopandas.io.sql.read_postgis(sql, con) |
330 | 330 | |
331 | 331 | def test_write_postgis_default(self, engine_postgis, df_nybb): |
457 | 457 | ).fetchone()[0] |
458 | 458 | assert target_srid == 0, "SRID should be 0, found %s" % target_srid |
459 | 459 | |
460 | def test_write_postgis_with_esri_authority(self, engine_postgis, df_nybb): | |
461 | """ | |
462 | Tests that GeoDataFrame can be written to PostGIS with ESRI Authority | |
463 | CRS information (GH #2414). | |
464 | """ | |
465 | engine = engine_postgis | |
466 | ||
467 | table = "nybb" | |
468 | ||
469 | # Write to db | |
470 | df_nybb_esri = df_nybb.to_crs("ESRI:102003") | |
471 | write_postgis(df_nybb_esri, con=engine, name=table, if_exists="replace") | |
472 | # Validate that srid is 102003 | |
473 | target_srid = engine.execute( | |
474 | "SELECT Find_SRID('{schema}', '{table}', '{geom_col}');".format( | |
475 | schema="public", table=table, geom_col="geometry" | |
476 | ) | |
477 | ).fetchone()[0] | |
478 | assert target_srid == 102003, "SRID should be 102003, found %s" % target_srid | |
479 | ||
460 | 480 | def test_write_postgis_geometry_collection( |
461 | 481 | self, engine_postgis, df_geom_collection |
462 | 482 | ): |
5 | 5 | |
6 | 6 | import geopandas |
7 | 7 | |
8 | from distutils.version import LooseVersion | |
8 | from packaging.version import Version | |
9 | 9 | |
10 | 10 | from ._decorator import doc |
11 | 11 | |
12 | 12 | |
13 | def deprecated(new): | |
13 | def deprecated(new, warning_type=FutureWarning): | |
14 | 14 | """Helper to provide deprecation warning.""" |
15 | 15 | |
16 | 16 | def old(*args, **kwargs): |
17 | 17 | warnings.warn( |
18 | 18 | "{} is intended for internal ".format(new.__name__[1:]) |
19 | 19 | + "use only, and will be deprecated.", |
20 | DeprecationWarning, | |
20 | warning_type, | |
21 | 21 | stacklevel=2, |
22 | 22 | ) |
23 | 23 | new(*args, **kwargs) |
69 | 69 | from matplotlib.colors import is_color_like |
70 | 70 | from typing import Iterable |
71 | 71 | |
72 | mpl = matplotlib.__version__ | |
73 | if mpl >= LooseVersion("3.4") or (mpl > LooseVersion("3.3.2") and "+" in mpl): | |
72 | mpl = Version(matplotlib.__version__) | |
73 | if mpl >= Version("3.4"): | |
74 | 74 | # alpha is supported as array argument with matplotlib 3.4+ |
75 | 75 | scalar_kwargs = ["marker", "path_effects"] |
76 | 76 | else: |
324 | 324 | ---------- |
325 | 325 | s : Series |
326 | 326 | The GeoSeries to be plotted. Currently Polygon, |
327 | MultiPolygon, LineString, MultiLineString and Point | |
327 | MultiPolygon, LineString, MultiLineString, Point and MultiPoint | |
328 | 328 | geometries can be plotted. |
329 | 329 | cmap : str (default None) |
330 | 330 | The name of a colormap recognized by matplotlib. Any |
334 | 334 | |
335 | 335 | tab10, tab20, Accent, Dark2, Paired, Pastel1, Set1, Set2 |
336 | 336 | |
337 | color : str (default None) | |
337 | color : str, np.array, pd.Series, List (default None) | |
338 | 338 | If specified, all objects will be colored uniformly. |
339 | 339 | ax : matplotlib.pyplot.Artist (default None) |
340 | 340 | axes on which to draw the plot |
413 | 413 | ) |
414 | 414 | return ax |
415 | 415 | |
416 | # have colors been given for all geometries? | |
417 | color_given = pd.api.types.is_list_like(color) and len(color) == len(s) | |
418 | ||
416 | 419 | # if cmap is specified, create range of colors based on cmap |
417 | 420 | values = None |
418 | 421 | if cmap is not None: |
425 | 428 | # decompose GeometryCollections |
426 | 429 | geoms, multiindex = _flatten_multi_geoms(s.geometry, prefix="Geom") |
427 | 430 | values = np.take(values, multiindex, axis=0) if cmap else None |
431 | # ensure indexes are consistent | |
432 | if color_given and isinstance(color, pd.Series): | |
433 | color = color.reindex(s.index) | |
434 | expl_color = np.take(color, multiindex, axis=0) if color_given else color | |
428 | 435 | expl_series = geopandas.GeoSeries(geoms) |
429 | 436 | |
430 | 437 | geom_types = expl_series.type |
442 | 449 | # color overrides both face and edgecolor. As we want people to be |
443 | 450 | # able to use edgecolor as well, pass color to facecolor |
444 | 451 | facecolor = style_kwds.pop("facecolor", None) |
452 | color_ = expl_color[poly_idx] if color_given else color | |
445 | 453 | if color is not None: |
446 | facecolor = color | |
454 | facecolor = color_ | |
447 | 455 | |
448 | 456 | values_ = values[poly_idx] if cmap else None |
449 | 457 | _plot_polygon_collection( |
454 | 462 | lines = expl_series[line_idx] |
455 | 463 | if not lines.empty: |
456 | 464 | values_ = values[line_idx] if cmap else None |
465 | color_ = expl_color[line_idx] if color_given else color | |
466 | ||
457 | 467 | _plot_linestring_collection( |
458 | ax, lines, values_, color=color, cmap=cmap, **style_kwds | |
468 | ax, lines, values_, color=color_, cmap=cmap, **style_kwds | |
459 | 469 | ) |
460 | 470 | |
461 | 471 | # plot all Points in the same collection |
462 | 472 | points = expl_series[point_idx] |
463 | 473 | if not points.empty: |
464 | 474 | values_ = values[point_idx] if cmap else None |
475 | color_ = expl_color[point_idx] if color_given else color | |
476 | ||
465 | 477 | _plot_point_collection( |
466 | ax, points, values_, color=color, cmap=cmap, **style_kwds | |
478 | ax, points, values_, color=color_, cmap=cmap, **style_kwds | |
467 | 479 | ) |
468 | 480 | |
469 | 481 | plt.draw() |
523 | 535 | - 'hexbin' : hexbin plot. |
524 | 536 | cmap : str (default None) |
525 | 537 | The name of a colormap recognized by matplotlib. |
526 | color : str (default None) | |
538 | color : str, np.array, pd.Series (default None) | |
527 | 539 | If specified, all objects will be colored uniformly. |
528 | 540 | ax : matplotlib.pyplot.Artist (default None) |
529 | 541 | axes on which to draw the plot |
562 | 574 | Size of the resulting matplotlib.figure.Figure. If the argument |
563 | 575 | axes is given explicitly, figsize is ignored. |
564 | 576 | legend_kwds : dict (default None) |
565 | Keyword arguments to pass to matplotlib.pyplot.legend() or | |
566 | matplotlib.pyplot.colorbar(). | |
577 | Keyword arguments to pass to :func:`matplotlib.pyplot.legend` or | |
578 | :func:`matplotlib.pyplot.colorbar`. | |
567 | 579 | Additional accepted keywords when `scheme` is specified: |
568 | 580 | |
569 | 581 | fmt : string |
736 | 748 | except ImportError: |
737 | 749 | raise ImportError(mc_err) |
738 | 750 | |
739 | if mapclassify.__version__ < LooseVersion("2.4.0"): | |
751 | if Version(mapclassify.__version__) < Version("2.4.0"): | |
740 | 752 | raise ImportError(mc_err) |
741 | 753 | |
742 | 754 | if classification_kwds is None: |
859 | 871 | **style_kwds, |
860 | 872 | ) |
861 | 873 | |
862 | if missing_kwds is not None and not expl_series[nan_idx].empty: | |
874 | missing_data = not expl_series[nan_idx].empty | |
875 | if missing_kwds is not None and missing_data: | |
863 | 876 | if color: |
864 | 877 | if "color" not in missing_kwds: |
865 | 878 | missing_kwds["color"] = color |
901 | 914 | markeredgewidth=0, |
902 | 915 | ) |
903 | 916 | ) |
904 | if missing_kwds is not None: | |
917 | if missing_kwds is not None and missing_data: | |
905 | 918 | if "color" in merged_kwds: |
906 | 919 | merged_kwds["facecolor"] = merged_kwds["color"] |
907 | 920 | patches.append( |
452 | 452 | |
453 | 453 | # handle empty / invalid geometries |
454 | 454 | if geometry is None: |
455 | # return an empty integer array, similar to pygeys.STRtree.query. | |
455 | # return an empty integer array, similar to pygeos.STRtree.query. | |
456 | 456 | return np.array([], dtype=np.intp) |
457 | 457 | |
458 | 458 | if not isinstance(geometry, BaseGeometry): |
632 | 632 | |
633 | 633 | _PYGEOS_PREDICATES = {p.name for p in pygeos.strtree.BinaryPredicate} | set([None]) |
634 | 634 | |
635 | class PyGEOSSTRTreeIndex(pygeos.STRtree): | |
635 | class PyGEOSSTRTreeIndex(BaseSpatialIndex): | |
636 | 636 | """A simple wrapper around pygeos's STRTree. |
637 | 637 | |
638 | 638 | |
650 | 650 | non_empty = geometry.copy() |
651 | 651 | non_empty[pygeos.is_empty(non_empty)] = None |
652 | 652 | # set empty geometries to None to maintain indexing |
653 | super().__init__(non_empty) | |
653 | self._tree = pygeos.STRtree(non_empty) | |
654 | 654 | # store geometries, including empty geometries for user access |
655 | 655 | self.geometries = geometry.copy() |
656 | 656 | |
686 | 686 | if isinstance(geometry, BaseGeometry): |
687 | 687 | geometry = array._shapely_to_geom(geometry) |
688 | 688 | |
689 | matches = super().query(geometry=geometry, predicate=predicate) | |
689 | matches = self._tree.query(geometry=geometry, predicate=predicate) | |
690 | 690 | |
691 | 691 | if sort: |
692 | 692 | return np.sort(matches) |
739 | 739 | |
740 | 740 | geometry = self._as_geometry_array(geometry) |
741 | 741 | |
742 | res = super().query_bulk(geometry, predicate) | |
742 | res = self._tree.query_bulk(geometry, predicate) | |
743 | 743 | |
744 | 744 | if sort: |
745 | 745 | # sort by first array (geometry) and then second (tree) |
759 | 759 | geometry = self._as_geometry_array(geometry) |
760 | 760 | |
761 | 761 | if not return_all and max_distance is None and not return_distance: |
762 | return super().nearest(geometry) | |
763 | ||
764 | result = super().nearest_all( | |
762 | return self._tree.nearest(geometry) | |
763 | ||
764 | result = self._tree.nearest_all( | |
765 | 765 | geometry, max_distance=max_distance, return_distance=return_distance |
766 | 766 | ) |
767 | 767 | if return_distance: |
803 | 803 | |
804 | 804 | # need to convert tuple of bounds to a geometry object |
805 | 805 | if len(coordinates) == 4: |
806 | indexes = super().query(pygeos.box(*coordinates)) | |
806 | indexes = self._tree.query(pygeos.box(*coordinates)) | |
807 | 807 | elif len(coordinates) == 2: |
808 | indexes = super().query(pygeos.points(*coordinates)) | |
808 | indexes = self._tree.query(pygeos.points(*coordinates)) | |
809 | 809 | else: |
810 | 810 | raise TypeError( |
811 | 811 | "Invalid coordinates, must be iterable in format " |
818 | 818 | @property |
819 | 819 | @doc(BaseSpatialIndex.size) |
820 | 820 | def size(self): |
821 | return len(self) | |
821 | return len(self._tree) | |
822 | 822 | |
823 | 823 | @property |
824 | 824 | @doc(BaseSpatialIndex.is_empty) |
825 | 825 | def is_empty(self): |
826 | return len(self) == 0 | |
826 | return len(self._tree) == 0 | |
827 | ||
828 | def __len__(self): | |
829 | return len(self._tree) |
315 | 315 | ) |
316 | 316 | |
317 | 317 | # geometry comparison |
318 | for col, dtype in left.dtypes.iteritems(): | |
318 | for col, dtype in left.dtypes.items(): | |
319 | 319 | if isinstance(dtype, GeometryDtype): |
320 | 320 | assert_geoseries_equal( |
321 | 321 | left[col], |
0 | 0 | import subprocess |
1 | 1 | import sys |
2 | ||
3 | from geopandas._compat import PANDAS_GE_10 | |
4 | 2 | |
5 | 3 | |
6 | 4 | def test_no_additional_imports(): |
19 | 17 | "psycopg2", |
20 | 18 | "geopy", |
21 | 19 | "geoalchemy2", |
20 | "matplotlib", | |
22 | 21 | } |
23 | if PANDAS_GE_10: | |
24 | # pandas > 0.25 stopped importing matplotlib by default | |
25 | blacklist.add("matplotlib") | |
26 | 22 | |
27 | 23 | code = """ |
28 | 24 | import sys |
9 | 9 | import shapely.geometry |
10 | 10 | from shapely.geometry.base import CAP_STYLE, JOIN_STYLE |
11 | 11 | import shapely.wkb |
12 | import shapely.wkt | |
12 | 13 | from shapely._buildcfg import geos_version |
13 | 14 | |
14 | 15 | import geopandas |
31 | 32 | shapely.geometry.Polygon([(random.random(), random.random()) for i in range(3)]) |
32 | 33 | for _ in range(10) |
33 | 34 | ] |
34 | triangles = triangle_no_missing + [shapely.geometry.Polygon(), None] | |
35 | triangles = triangle_no_missing + [shapely.wkt.loads("POLYGON EMPTY"), None] | |
35 | 36 | T = from_shapely(triangles) |
36 | 37 | |
37 | 38 | points_no_missing = [ |
142 | 143 | missing_values = [None] |
143 | 144 | if not compat.USE_PYGEOS: |
144 | 145 | missing_values.extend([b"", np.nan]) |
145 | ||
146 | if compat.PANDAS_GE_10: | |
147 | missing_values.append(pd.NA) | |
146 | missing_values.append(pd.NA) | |
148 | 147 | |
149 | 148 | res = from_wkb(missing_values) |
150 | 149 | np.testing.assert_array_equal(res, np.full(len(missing_values), None)) |
204 | 203 | L_wkt = [f(p.wkt) for p in points_no_missing] |
205 | 204 | res = from_wkt(L_wkt) |
206 | 205 | assert isinstance(res, GeometryArray) |
207 | assert all(v.almost_equals(t) for v, t in zip(res, points_no_missing)) | |
206 | tol = 0.5 * 10 ** (-6) | |
207 | assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) | |
208 | assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) | |
208 | 209 | |
209 | 210 | # array |
210 | 211 | res = from_wkt(np.array(L_wkt, dtype=object)) |
211 | 212 | assert isinstance(res, GeometryArray) |
212 | assert all(v.almost_equals(t) for v, t in zip(res, points_no_missing)) | |
213 | assert all(v.equals_exact(t, tolerance=tol) for v, t in zip(res, points_no_missing)) | |
213 | 214 | |
214 | 215 | # missing values |
215 | 216 | # TODO(pygeos) does not support empty strings, np.nan, or pd.NA |
216 | 217 | missing_values = [None] |
217 | 218 | if not compat.USE_PYGEOS: |
218 | 219 | missing_values.extend([f(""), np.nan]) |
219 | ||
220 | if compat.PANDAS_GE_10: | |
221 | missing_values.append(pd.NA) | |
220 | missing_values.append(pd.NA) | |
222 | 221 | |
223 | 222 | res = from_wkb(missing_values) |
224 | 223 | np.testing.assert_array_equal(res, np.full(len(missing_values), None)) |
340 | 339 | |
341 | 340 | |
342 | 341 | @pytest.mark.parametrize( |
343 | "attr,args", [("equals_exact", (0.1,)), ("almost_equals", (3,))] | |
344 | ) | |
345 | def test_equals_deprecation(attr, args): | |
346 | point = points[0] | |
347 | tri = triangles[0] | |
348 | ||
349 | for other in [point, tri, shapely.geometry.Polygon()]: | |
350 | with pytest.warns(FutureWarning): | |
351 | result = getattr(T, attr)(other, *args) | |
352 | assert result.tolist() == getattr(T, "geom_" + attr)(other, *args).tolist() | |
353 | ||
354 | ||
355 | @pytest.mark.parametrize( | |
356 | 342 | "attr", |
357 | 343 | [ |
358 | 344 | "boundary", |
366 | 352 | def test_unary_geo(attr): |
367 | 353 | na_value = None |
368 | 354 | |
369 | if attr == "boundary": | |
370 | # pygeos returns None for empty geometries | |
371 | if not compat.USE_PYGEOS: | |
372 | # boundary raises for empty geometry | |
373 | with pytest.raises(Exception): | |
374 | T.boundary | |
375 | ||
376 | values = triangle_no_missing + [None] | |
377 | A = from_shapely(values) | |
378 | else: | |
379 | values = triangles | |
380 | A = T | |
381 | ||
382 | result = getattr(A, attr) | |
383 | if attr == "exterior" and compat.USE_PYGEOS: | |
384 | # TODO(pygeos) | |
385 | # empty Polygon() has an exterior with shapely > 1.7, which gives | |
386 | # empty LinearRing instead of None, | |
387 | # but conversion to pygeos still results in empty GeometryCollection | |
388 | expected = [ | |
389 | getattr(t, attr) if t is not None and not t.is_empty else na_value | |
390 | for t in values | |
391 | ] | |
392 | else: | |
393 | expected = [getattr(t, attr) if t is not None else na_value for t in values] | |
355 | result = getattr(T, attr) | |
356 | expected = [getattr(t, attr) if t is not None else na_value for t in triangles] | |
394 | 357 | |
395 | 358 | assert equal_geometries(result, expected) |
396 | 359 | |
465 | 428 | "has_z", |
466 | 429 | # for is_ring we raise a warning about the value for Polygon changing |
467 | 430 | pytest.param( |
468 | "is_ring", marks=pytest.mark.filterwarnings("ignore:is_ring:FutureWarning") | |
431 | "is_ring", | |
432 | marks=[ | |
433 | pytest.mark.filterwarnings("ignore:is_ring:FutureWarning"), | |
434 | ], | |
469 | 435 | ), |
470 | 436 | ], |
471 | 437 | ) |
483 | 449 | |
484 | 450 | result = getattr(V, attr) |
485 | 451 | |
486 | if attr == "is_simple" and (geos_version < (3, 8) or compat.USE_PYGEOS): | |
452 | if attr == "is_simple" and geos_version < (3, 8): | |
487 | 453 | # poly.is_simple raises an error for empty polygon for GEOS < 3.8 |
488 | 454 | # with shapely, pygeos always returns False for all GEOS versions |
489 | # But even for Shapely with GEOS >= 3.8, empty GeometryCollection | |
490 | # returns True instead of False | |
491 | 455 | expected = [ |
492 | 456 | getattr(t, attr) if t is not None and not t.is_empty else na_value |
493 | 457 | for t in vals |
499 | 463 | else na_value |
500 | 464 | for t in vals |
501 | 465 | ] |
466 | # empty Linearring.is_ring gives False with Shapely < 2.0 | |
467 | if compat.USE_PYGEOS and not compat.SHAPELY_GE_20: | |
468 | expected[-2] = True | |
469 | elif ( | |
470 | attr == "is_closed" | |
471 | and compat.USE_PYGEOS | |
472 | and compat.SHAPELY_GE_182 | |
473 | and not compat.SHAPELY_GE_20 | |
474 | ): | |
475 | # In shapely 1.8.2, is_closed was changed to return always True for | |
476 | # Polygon/MultiPolygon, while PyGEOS returns always False | |
477 | expected = [False] * len(vals) | |
502 | 478 | else: |
503 | 479 | expected = [getattr(t, attr) if t is not None else na_value for t in vals] |
480 | ||
504 | 481 | assert result.tolist() == expected |
505 | 482 | |
506 | 483 | |
512 | 489 | shapely.geometry.LineString([(0, 0), (1, 1), (1, -1)]), |
513 | 490 | shapely.geometry.LineString([(0, 0), (1, 1), (1, -1), (0, 0)]), |
514 | 491 | shapely.geometry.Polygon([(0, 0), (1, 1), (1, -1)]), |
515 | shapely.geometry.Polygon(), | |
492 | shapely.wkt.loads("POLYGON EMPTY"), | |
516 | 493 | None, |
517 | 494 | ] |
518 | expected = [True, False, True, True, False, False] | |
495 | expected = [True, False, True, True, True, False] | |
496 | if not compat.USE_PYGEOS and not compat.SHAPELY_GE_20: | |
497 | # empty polygon is_ring gives False with Shapely < 2.0 | |
498 | expected[-2] = False | |
519 | 499 | |
520 | 500 | result = from_shapely(g).is_ring |
521 | 501 | |
535 | 515 | def test_geom_types(): |
536 | 516 | cat = T.geom_type |
537 | 517 | # empty polygon has GeometryCollection type |
538 | assert list(cat) == ["Polygon"] * (len(T) - 2) + ["GeometryCollection", None] | |
518 | assert list(cat) == ["Polygon"] * (len(T) - 1) + [None] | |
539 | 519 | |
540 | 520 | |
541 | 521 | def test_geom_types_null_mixed(): |
783 | 763 | assert P5.equals(points[1]) |
784 | 764 | |
785 | 765 | |
766 | @pytest.mark.parametrize( | |
767 | "item", | |
768 | [ | |
769 | geopandas.GeoDataFrame( | |
770 | geometry=[shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])] | |
771 | ), | |
772 | geopandas.GeoSeries( | |
773 | [shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])] | |
774 | ), | |
775 | np.array([shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])]), | |
776 | [shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)])], | |
777 | shapely.geometry.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), | |
778 | ], | |
779 | ) | |
780 | def test_setitem(item): | |
781 | points = [shapely.geometry.Point(i, i) for i in range(10)] | |
782 | P = from_shapely(points) | |
783 | ||
784 | P[[0]] = item | |
785 | ||
786 | assert isinstance(P[0], shapely.geometry.Polygon) | |
787 | ||
788 | ||
786 | 789 | def test_equality_ops(): |
787 | 790 | with pytest.raises(ValueError): |
788 | 791 | P[:5] == P[:7] |
903 | 906 | assert t1[0] is None |
904 | 907 | |
905 | 908 | |
906 | @pytest.mark.skipif(not compat.PANDAS_GE_10, reason="pd.NA introduced in pandas 1.0") | |
907 | 909 | def test_isna_pdNA(): |
908 | 910 | t1 = T.copy() |
909 | 911 | t1[0] = pd.NA |
939 | 941 | self.landmarks.estimate_utm_crs() |
940 | 942 | else: |
941 | 943 | assert self.landmarks.estimate_utm_crs() == CRS("EPSG:32618") |
942 | assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
944 | if compat.PYPROJ_GE_32: # result is unstable in older pyproj | |
945 | assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
943 | 946 | |
944 | 947 | @pytest.mark.skipif(compat.PYPROJ_LT_3, reason="requires pyproj 3 or higher") |
945 | 948 | def test_estimate_utm_crs__projected(self): |
0 | from distutils.version import LooseVersion | |
1 | import os | |
0 | from packaging.version import Version | |
2 | 1 | |
3 | 2 | import random |
4 | 3 | |
17 | 16 | |
18 | 17 | # pyproj 2.3.1 fixed a segfault for the case working in an environment with |
19 | 18 | # 'init' dicts (https://github.com/pyproj4/pyproj/issues/415) |
20 | PYPROJ_LT_231 = LooseVersion(pyproj.__version__) < LooseVersion("2.3.1") | |
19 | PYPROJ_LT_231 = Version(pyproj.__version__) < Version("2.3.1") | |
20 | PYPROJ_GE_3 = Version(pyproj.__version__) >= Version("3.0.0") | |
21 | 21 | |
22 | 22 | |
23 | 23 | def _create_df(x, y=None, crs=None): |
124 | 124 | # with PROJ >= 7, the transformation using EPSG code vs proj4 string is |
125 | 125 | # slightly different due to use of grid files or not -> turn off network |
126 | 126 | # to not use grid files at all for this test |
127 | os.environ["PROJ_NETWORK"] = "OFF" | |
127 | if PYPROJ_GE_3: | |
128 | pyproj.network.set_network_enabled(False) | |
129 | ||
128 | 130 | df = df_epsg26918() |
129 | 131 | lonlat = df.to_crs(**epsg4326) |
130 | 132 | utm = lonlat.to_crs(**epsg26918) |
199 | 201 | assert s.crs == self.osgb |
200 | 202 | assert s.values.crs == self.osgb |
201 | 203 | |
202 | with pytest.warns(FutureWarning): | |
204 | with pytest.raises( | |
205 | ValueError, | |
206 | match="CRS mismatch between CRS of the passed geometries and 'crs'", | |
207 | ): | |
203 | 208 | s = GeoSeries(arr, crs=4326) |
204 | 209 | assert s.crs == self.osgb |
205 | 210 | |
206 | @pytest.mark.filterwarnings("ignore:Assigning CRS") | |
207 | 211 | def test_dataframe(self): |
208 | 212 | arr = from_shapely(self.geoms, crs=27700) |
209 | 213 | df = GeoDataFrame(geometry=arr) |
218 | 222 | assert df.geometry.crs == self.osgb |
219 | 223 | assert df.geometry.values.crs == self.osgb |
220 | 224 | |
221 | # different passed CRS than array CRS is ignored | |
222 | with pytest.warns(FutureWarning, match="CRS mismatch"): | |
225 | # different passed CRS than array CRS is now an error | |
226 | match_str = "CRS mismatch between CRS of the passed geometries and 'crs'" | |
227 | with pytest.raises(ValueError, match=match_str): | |
223 | 228 | df = GeoDataFrame(geometry=s, crs=4326) |
224 | assert df.crs == self.osgb | |
225 | assert df.geometry.crs == self.osgb | |
226 | assert df.geometry.values.crs == self.osgb | |
227 | with pytest.warns(FutureWarning, match="CRS mismatch"): | |
229 | with pytest.raises(ValueError, match=match_str): | |
228 | 230 | GeoDataFrame(geometry=s, crs=4326) |
229 | with pytest.warns(FutureWarning, match="CRS mismatch"): | |
231 | with pytest.raises(ValueError, match=match_str): | |
230 | 232 | GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326) |
231 | with pytest.warns(FutureWarning, match="CRS mismatch"): | |
233 | with pytest.raises(ValueError, match=match_str): | |
232 | 234 | GeoDataFrame(df, crs=4326).crs |
233 | 235 | |
234 | 236 | # manually change CRS |
240 | 242 | assert df.geometry.crs == self.wgs |
241 | 243 | assert df.geometry.values.crs == self.wgs |
242 | 244 | |
243 | df = GeoDataFrame(self.geoms, columns=["geom"], crs=27700) | |
244 | assert df.crs == self.osgb | |
245 | df = df.set_geometry("geom") | |
245 | with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"): | |
246 | GeoDataFrame(self.geoms, columns=["geom"], crs=27700) | |
247 | with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"): | |
248 | GeoDataFrame(crs=27700) | |
249 | ||
250 | df = GeoDataFrame(self.geoms, columns=["geom"]) | |
251 | df = df.set_geometry("geom", crs=27700) | |
246 | 252 | assert df.crs == self.osgb |
247 | 253 | assert df.geometry.crs == self.osgb |
248 | 254 | assert df.geometry.values.crs == self.osgb |
254 | 260 | assert df.geometry.crs == self.osgb |
255 | 261 | assert df.geometry.values.crs == self.osgb |
256 | 262 | |
257 | df = GeoDataFrame(crs=27700) | |
258 | df = df.set_geometry(self.geoms) | |
259 | assert df.crs == self.osgb | |
260 | assert df.geometry.crs == self.osgb | |
261 | assert df.geometry.values.crs == self.osgb | |
262 | ||
263 | 263 | # new geometry with set CRS has priority over GDF CRS |
264 | df = GeoDataFrame(crs=27700) | |
264 | df = GeoDataFrame(geometry=self.geoms, crs=27700) | |
265 | 265 | df = df.set_geometry(self.geoms, crs=4326) |
266 | 266 | assert df.crs == self.wgs |
267 | 267 | assert df.geometry.crs == self.wgs |
296 | 296 | |
297 | 297 | # geometry column without geometry |
298 | 298 | df = GeoDataFrame({"geometry": [0, 1]}) |
299 | df.crs = 27700 | |
300 | assert df.crs == self.osgb | |
299 | with pytest.warns( | |
300 | FutureWarning, match="Accessing CRS of a GeoDataFrame without a geometry" | |
301 | ): | |
302 | df.crs = 27700 | |
303 | with pytest.warns( | |
304 | FutureWarning, match="Accessing CRS of a GeoDataFrame without a geometry" | |
305 | ): | |
306 | assert df.crs == self.osgb | |
307 | ||
308 | def test_dataframe_getitem_without_geometry_column(self): | |
309 | df = GeoDataFrame({"col": range(10)}, geometry=self.arr) | |
310 | df["geom2"] = df.geometry.centroid | |
311 | subset = df[["col", "geom2"]] | |
312 | with pytest.warns( | |
313 | FutureWarning, match="Accessing CRS of a GeoDataFrame without a geometry" | |
314 | ): | |
315 | assert subset.crs == self.osgb | |
301 | 316 | |
302 | 317 | def test_dataframe_setitem(self): |
303 | 318 | # new geometry CRS has priority over GDF CRS |
335 | 350 | assert df["geometry"].crs == self.wgs |
336 | 351 | assert df["other_geom"].crs == self.osgb |
337 | 352 | |
353 | def test_dataframe_setitem_without_geometry_column(self): | |
354 | arr = from_shapely(self.geoms) | |
355 | df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326) | |
356 | ||
357 | # create a dataframe without geometry column, but currently has cached _crs | |
358 | with pytest.warns(UserWarning): | |
359 | df["geometry"] = 1 | |
360 | ||
361 | # assigning a list of geometry object will currently use _crs | |
362 | with pytest.warns( | |
363 | FutureWarning, | |
364 | match="Setting geometries to a GeoDataFrame without a geometry", | |
365 | ): | |
366 | df["geometry"] = self.geoms | |
367 | assert df.crs == self.wgs | |
368 | ||
338 | 369 | @pytest.mark.parametrize( |
339 | 370 | "scalar", [None, Point(0, 0), LineString([(0, 0), (1, 1)])] |
340 | 371 | ) |
341 | 372 | def test_scalar(self, scalar): |
342 | with pytest.warns(FutureWarning): | |
373 | df = GeoDataFrame() | |
374 | df["geometry"] = scalar | |
375 | df.crs = 4326 | |
376 | assert df.crs == self.wgs | |
377 | assert df.geometry.crs == self.wgs | |
378 | assert df.geometry.values.crs == self.wgs | |
379 | ||
380 | @pytest.mark.filterwarnings("ignore:Accessing CRS") | |
381 | def test_crs_with_no_geom_fails(self): | |
382 | with pytest.raises(ValueError, match="Assigning CRS to a GeoDataFrame without"): | |
343 | 383 | df = GeoDataFrame() |
344 | 384 | df.crs = 4326 |
345 | df["geometry"] = scalar | |
346 | assert df.crs == self.wgs | |
347 | assert df.geometry.crs == self.wgs | |
348 | assert df.geometry.values.crs == self.wgs | |
349 | 385 | |
350 | 386 | def test_read_file(self): |
351 | 387 | nybb_filename = datasets.get_path("nybb") |
577 | 613 | assert merged.geom.values.crs == self.osgb |
578 | 614 | assert merged.crs == self.osgb |
579 | 615 | |
580 | # CRS should be assigned to geometry | |
581 | def test_deprecation(self): | |
582 | with pytest.warns(FutureWarning): | |
583 | df = GeoDataFrame([], crs=27700) | |
584 | ||
585 | # https://github.com/geopandas/geopandas/issues/1548 | |
586 | # ensure we still have converted the crs value to a CRS object | |
587 | assert isinstance(df.crs, pyproj.CRS) | |
588 | ||
589 | with pytest.warns(FutureWarning): | |
590 | df = GeoDataFrame([]) | |
591 | df.crs = 27700 | |
592 | ||
593 | assert isinstance(df.crs, pyproj.CRS) | |
594 | ||
595 | 616 | # make sure that geometry column from list has CRS (__setitem__) |
596 | 617 | def test_setitem_geometry(self): |
597 | 618 | arr = from_shapely(self.geoms, crs=27700) |
6 | 6 | |
7 | 7 | from pandas.testing import assert_frame_equal |
8 | 8 | import pytest |
9 | ||
10 | from geopandas.testing import assert_geodataframe_equal | |
9 | 11 | |
10 | 12 | |
11 | 13 | @pytest.fixture |
17 | 19 | nybb_polydf = nybb_polydf.set_geometry("myshapes") |
18 | 20 | nybb_polydf["manhattan_bronx"] = 5 |
19 | 21 | nybb_polydf.loc[3:4, "manhattan_bronx"] = 6 |
22 | nybb_polydf["BoroCode"] = nybb_polydf["BoroCode"].astype("int64") | |
20 | 23 | return nybb_polydf |
21 | 24 | |
22 | 25 | |
70 | 73 | assert test.crs is None |
71 | 74 | |
72 | 75 | |
73 | def first_dissolve(nybb_polydf, first): | |
76 | def test_first_dissolve(nybb_polydf, first): | |
74 | 77 | test = nybb_polydf.dissolve("manhattan_bronx") |
75 | 78 | assert_frame_equal(first, test, check_column_type=False) |
76 | 79 | |
296 | 299 | UserWarning, match="dropna kwarg is not supported for pandas < 1.1.0" |
297 | 300 | ): |
298 | 301 | nybb_polydf.dissolve(dropna=False) |
302 | ||
303 | ||
304 | def test_dissolve_multi_agg(nybb_polydf, merged_shapes): | |
305 | ||
306 | merged_shapes[("BoroCode", "min")] = [3, 1] | |
307 | merged_shapes[("BoroCode", "max")] = [5, 2] | |
308 | merged_shapes[("BoroName", "count")] = [3, 2] | |
309 | ||
310 | with pytest.warns(None) as record: | |
311 | test = nybb_polydf.dissolve( | |
312 | by="manhattan_bronx", | |
313 | aggfunc={ | |
314 | "BoroCode": ["min", "max"], | |
315 | "BoroName": "count", | |
316 | }, | |
317 | ) | |
318 | assert_geodataframe_equal(test, merged_shapes) | |
319 | assert len(record) == 0 |
1 | 1 | import numpy as np |
2 | 2 | import pandas as pd |
3 | 3 | import pytest |
4 | from distutils.version import LooseVersion | |
4 | from packaging.version import Version | |
5 | 5 | |
6 | 6 | folium = pytest.importorskip("folium") |
7 | 7 | branca = pytest.importorskip("branca") |
12 | 12 | import matplotlib.colors as colors # noqa |
13 | 13 | from branca.colormap import StepColormap # noqa |
14 | 14 | |
15 | BRANCA_05 = str(branca.__version__) > LooseVersion("0.4.2") | |
15 | BRANCA_05 = Version(branca.__version__) > Version("0.4.2") | |
16 | 16 | |
17 | 17 | |
18 | 18 | class TestExplore: |
61 | 61 | assert "openstreetmap" in m.to_dict()["children"].keys() |
62 | 62 | |
63 | 63 | def test_map_settings_custom(self): |
64 | """Check custom map settins""" | |
64 | """Check custom map settings""" | |
65 | 65 | m = self.nybb.explore( |
66 | 66 | zoom_control=False, |
67 | 67 | width=200, |
251 | 251 | df["categorical"] = pd.Categorical(df["BoroName"]) |
252 | 252 | with pytest.raises(ValueError, match="Cannot specify 'categories'"): |
253 | 253 | df.explore("categorical", categories=["Brooklyn", "Staten Island"]) |
254 | ||
255 | def test_bool(self): | |
256 | df = self.nybb.copy() | |
257 | df["bool"] = [True, False, True, False, True] | |
258 | m = df.explore("bool") | |
259 | out_str = self._fetch_map_string(m) | |
260 | assert '"__folium_color":"#9edae5","bool":true' in out_str | |
261 | assert '"__folium_color":"#1f77b4","bool":false' in out_str | |
254 | 262 | |
255 | 263 | def test_column_values(self): |
256 | 264 | """ |
308 | 316 | m = self.world.explore(column="pop_est", style_kwds=dict(color="black")) |
309 | 317 | assert '"color":"black"' in self._fetch_map_string(m) |
310 | 318 | |
319 | # custom style_function - geopandas/issues/2350 | |
320 | m = self.world.explore( | |
321 | style_kwds={ | |
322 | "style_function": lambda x: { | |
323 | "fillColor": "red" | |
324 | if x["properties"]["gdp_md_est"] < 10**6 | |
325 | else "green", | |
326 | "color": "black" | |
327 | if x["properties"]["gdp_md_est"] < 10**6 | |
328 | else "white", | |
329 | } | |
330 | } | |
331 | ) | |
332 | # two lines with formatting instructions from style_function. | |
333 | # make sure each passes test | |
334 | assert all( | |
335 | [ | |
336 | ('"fillColor":"green"' in t and '"color":"white"' in t) | |
337 | or ('"fillColor":"red"' in t and '"color":"black"' in t) | |
338 | for t in [ | |
339 | "".join(line.split()) | |
340 | for line in m._parent.render().split("\n") | |
341 | if "return" in line and "color" in line | |
342 | ] | |
343 | ] | |
344 | ) | |
345 | ||
346 | # style function has to be callable | |
347 | with pytest.raises(ValueError, match="'style_function' has to be a callable"): | |
348 | self.world.explore(style_kwds={"style_function": "not callable"}) | |
349 | ||
311 | 350 | def test_tooltip(self): |
312 | 351 | """Test tooltip""" |
313 | 352 | # default with no tooltip or popup |
412 | 451 | assert "BoroName" in out_str |
413 | 452 | |
414 | 453 | def test_default_markers(self): |
415 | # check overriden default for points | |
454 | # check overridden default for points | |
416 | 455 | m = self.cities.explore() |
417 | 456 | strings = ['"radius":2', '"fill":true', "CircleMarker(latlng,opts)"] |
418 | 457 | out_str = self._fetch_map_string(m) |
541 | 580 | assert out_str.count("#5ec962ff") == 100 |
542 | 581 | assert out_str.count("#fde725ff") == 100 |
543 | 582 | |
544 | # scale legend accorrdingly | |
583 | # scale legend accordingly | |
545 | 584 | m = self.world.explore( |
546 | 585 | "pop_est", |
547 | 586 | legend=True, |
549 | 588 | ) |
550 | 589 | out_str = self._fetch_map_string(m) |
551 | 590 | assert out_str.count("#440154ff") == 16 |
552 | assert out_str.count("#3b528bff") == 51 | |
553 | assert out_str.count("#21918cff") == 133 | |
554 | assert out_str.count("#5ec962ff") == 282 | |
555 | assert out_str.count("#fde725ff") == 18 | |
591 | assert out_str.count("#3b528bff") == 50 | |
592 | assert out_str.count("#21918cff") == 138 | |
593 | assert out_str.count("#5ec962ff") == 290 | |
594 | assert out_str.count("#fde725ff") == 6 | |
556 | 595 | |
557 | 596 | # discrete cmap |
558 | 597 | m = self.world.explore("pop_est", legend=True, cmap="Pastel2") |
569 | 608 | |
570 | 609 | @pytest.mark.skipif(not BRANCA_05, reason="requires branca >= 0.5.0") |
571 | 610 | def test_colorbar_max_labels(self): |
611 | import re | |
612 | ||
572 | 613 | # linear |
573 | 614 | m = self.world.explore("pop_est", legend_kwds=dict(max_labels=3)) |
574 | 615 | out_str = self._fetch_map_string(m) |
575 | ||
576 | tick_values = [140.0, 465176713.5921569, 930353287.1843138] | |
577 | for tick in tick_values: | |
578 | assert str(tick) in out_str | |
616 | tick_str = re.search(r"tickValues\(\[[\',\,\.,0-9]*\]\)", out_str).group(0) | |
617 | assert ( | |
618 | tick_str.replace(",''", "") | |
619 | == "tickValues([140.0,471386328.07843137,942772516.1568627])" | |
620 | ) | |
579 | 621 | |
580 | 622 | # scheme |
581 | 623 | m = self.world.explore( |
582 | 624 | "pop_est", scheme="headtailbreaks", legend_kwds=dict(max_labels=3) |
583 | 625 | ) |
584 | 626 | out_str = self._fetch_map_string(m) |
585 | ||
586 | assert "tickValues([140,'',182567501.0,'',1330619341.0,''])" in out_str | |
627 | assert "tickValues([140.0,'',184117213.1818182,'',1382066377.0,''])" in out_str | |
587 | 628 | |
588 | 629 | # short cmap |
589 | 630 | m = self.world.explore("pop_est", legend_kwds=dict(max_labels=3), cmap="tab10") |
590 | 631 | out_str = self._fetch_map_string(m) |
591 | 632 | |
592 | tick_values = [140.0, 551721192.4, 1103442244.8] | |
593 | for tick in tick_values: | |
594 | assert str(tick) in out_str | |
633 | tick_str = re.search(r"tickValues\(\[[\',\,\.,0-9]*\]\)", out_str).group(0) | |
634 | assert ( | |
635 | tick_str | |
636 | == "tickValues([140.0,'','','',559086084.0,'','','',1118172028.0,'','',''])" | |
637 | ) | |
595 | 638 | |
596 | 639 | def test_xyzservices_providers(self): |
597 | 640 | xyzservices = pytest.importorskip("xyzservices") |
607 | 650 | 'attribution":"\\u0026copy;\\u003cahref=\\"https://www.openstreetmap.org' |
608 | 651 | in out_str |
609 | 652 | ) |
610 | assert '"maxNativeZoom":19,"maxZoom":19,"minZoom":0' in out_str | |
653 | assert '"maxNativeZoom":20,"maxZoom":20,"minZoom":0' in out_str | |
611 | 654 | |
612 | 655 | def test_xyzservices_query_name(self): |
613 | 656 | pytest.importorskip("xyzservices") |
623 | 666 | 'attribution":"\\u0026copy;\\u003cahref=\\"https://www.openstreetmap.org' |
624 | 667 | in out_str |
625 | 668 | ) |
626 | assert '"maxNativeZoom":19,"maxZoom":19,"minZoom":0' in out_str | |
669 | assert '"maxNativeZoom":20,"maxZoom":20,"minZoom":0' in out_str | |
627 | 670 | |
628 | 671 | def test_linearrings(self): |
629 | 672 | rings = self.nybb.explode(index_parts=True).exterior |
643 | 686 | out_str = self._fetch_map_string(m) |
644 | 687 | |
645 | 688 | strings = [ |
646 | "[140.00,33986655.00]", | |
647 | "(33986655.00,105350020.00]", | |
648 | "(105350020.00,207353391.00]", | |
649 | "(207353391.00,326625791.00]", | |
650 | "(326625791.00,1379302771.00]", | |
689 | "[140.00,21803000.00]", | |
690 | "(21803000.00,66834405.00]", | |
691 | "(66834405.00,163046161.00]", | |
692 | "(163046161.00,328239523.00]", | |
693 | "(328239523.00,1397715000.00]", | |
651 | 694 | "missing", |
652 | 695 | ] |
653 | 696 | for s in strings: |
664 | 707 | out_str = self._fetch_map_string(m) |
665 | 708 | |
666 | 709 | strings = [ |
667 | ">140.00,33986655.00", | |
668 | ">33986655.00,105350020.00", | |
669 | ">105350020.00,207353391.00", | |
670 | ">207353391.00,326625791.00", | |
671 | ">326625791.00,1379302771.00", | |
710 | ">140.00,21803000.00", | |
711 | ">21803000.00,66834405.00", | |
712 | ">66834405.00,163046161.00", | |
713 | ">163046161.00,328239523.00", | |
714 | ">328239523.00,1397715000.00", | |
672 | 715 | "missing", |
673 | 716 | ] |
674 | 717 | for s in strings: |
699 | 742 | out_str = self._fetch_map_string(m) |
700 | 743 | |
701 | 744 | strings = [ |
702 | ">140,33986655", | |
703 | ">33986655,105350020", | |
704 | ">105350020,207353391", | |
705 | ">207353391,326625791", | |
706 | ">326625791,1379302771", | |
745 | ">140,21803000", | |
746 | ">21803000,66834405", | |
747 | ">66834405,163046161", | |
748 | ">163046161,328239523", | |
749 | ">328239523,1397715000", | |
707 | 750 | "missing", |
708 | 751 | ] |
709 | 752 | for s in strings: |
749 | 792 | for s in strings: |
750 | 793 | assert s in out_str |
751 | 794 | |
752 | assert out_str.count("008000ff") == 306 | |
753 | assert out_str.count("ffff00ff") == 187 | |
754 | assert out_str.count("ff0000ff") == 190 | |
795 | assert out_str.count("008000ff") == 304 | |
796 | assert out_str.count("ffff00ff") == 188 | |
797 | assert out_str.count("ff0000ff") == 191 | |
755 | 798 | |
756 | 799 | # Using custom function colormap |
757 | 800 | def my_color_function(field): |
796 | 839 | gdf["centroid"] = gdf.centroid |
797 | 840 | |
798 | 841 | gdf.explore() |
842 | ||
843 | def test_map_kwds(self): | |
844 | def check(): | |
845 | out_str = self._fetch_map_string(m) | |
846 | assert "zoomControl:false" in out_str | |
847 | assert "dragging:false" in out_str | |
848 | assert "scrollWheelZoom:false" in out_str | |
849 | ||
850 | # check that folium and leaflet Map() parameters can be passed | |
851 | m = self.world.explore( | |
852 | zoom_control=False, map_kwds=dict(dragging=False, scrollWheelZoom=False) | |
853 | ) | |
854 | check() | |
855 | with pytest.raises( | |
856 | ValueError, match="'zoom_control' cannot be specified in 'map_kwds'" | |
857 | ): | |
858 | self.world.explore( | |
859 | map_kwds=dict(dragging=False, scrollWheelZoom=False, zoom_control=False) | |
860 | ) |
230 | 230 | return request.param |
231 | 231 | |
232 | 232 | |
233 | @pytest.fixture | |
234 | def invalid_scalar(data): | |
235 | """ | |
236 | A scalar that *cannot* be held by this ExtensionArray. | |
237 | ||
238 | The default should work for most subclasses, but is not guaranteed. | |
239 | ||
240 | If the array can hold any item (i.e. object dtype), then use pytest.skip. | |
241 | """ | |
242 | return object.__new__(object) | |
243 | ||
244 | ||
233 | 245 | # Fixtures defined in pandas/conftest.py that are also needed: defining them |
234 | 246 | # here instead of importing for compatibility |
235 | 247 | |
292 | 304 | class TestInterface(extension_tests.BaseInterfaceTests): |
293 | 305 | def test_array_interface(self, data): |
294 | 306 | # we are overriding this base test because the creation of `expected` |
295 | # potentionally doesn't work for shapely geometries | |
307 | # potentially doesn't work for shapely geometries | |
296 | 308 | # TODO can be removed with Shapely 2.0 |
297 | 309 | result = np.array(data) |
298 | 310 | assert result[0] == data[0] |
1 | 1 | import os |
2 | 2 | import shutil |
3 | 3 | import tempfile |
4 | from distutils.version import LooseVersion | |
4 | from packaging.version import Version | |
5 | 5 | |
6 | 6 | import numpy as np |
7 | 7 | import pandas as pd |
8 | 8 | |
9 | import pyproj | |
10 | 9 | from pyproj import CRS |
11 | 10 | from pyproj.exceptions import CRSError |
12 | 11 | from shapely.geometry import Point, Polygon |
23 | 22 | import pytest |
24 | 23 | |
25 | 24 | |
26 | PYPROJ_LT_3 = LooseVersion(pyproj.__version__) < LooseVersion("3") | |
27 | 25 | TEST_NEAREST = compat.PYGEOS_GE_010 and compat.USE_PYGEOS |
28 | pandas_133 = pd.__version__ == LooseVersion("1.3.3") | |
26 | pandas_133 = Version(pd.__version__) == Version("1.3.3") | |
29 | 27 | |
30 | 28 | |
31 | 29 | @pytest.fixture |
335 | 333 | assert isinstance(result, GeoDataFrame) |
336 | 334 | assert isinstance(result.index, pd.DatetimeIndex) |
337 | 335 | |
336 | def test_set_geometry_np_int(self): | |
337 | self.df.loc[:, 0] = self.df.geometry | |
338 | df = self.df.set_geometry(np.int64(0)) | |
339 | assert df.geometry.name == 0 | |
340 | ||
341 | def test_get_geometry_invalid(self): | |
342 | df = GeoDataFrame() | |
343 | df["geom"] = self.df.geometry | |
344 | msg_geo_col_none = "active geometry column to use has not been set. " | |
345 | msg_geo_col_missing = "is not present. " | |
346 | ||
347 | with pytest.raises(AttributeError, match=msg_geo_col_missing): | |
348 | df.geometry | |
349 | df2 = self.df.copy() | |
350 | df2["geom2"] = df2.geometry | |
351 | df2 = df2[["BoroCode", "BoroName", "geom2"]] | |
352 | with pytest.raises(AttributeError, match=msg_geo_col_none): | |
353 | df2.geometry | |
354 | ||
355 | msg_other_geo_cols_present = "There are columns with geometry data type" | |
356 | msg_no_other_geo_cols = "There are no existing columns with geometry data type" | |
357 | with pytest.raises(AttributeError, match=msg_other_geo_cols_present): | |
358 | df2.geometry | |
359 | ||
360 | with pytest.raises(AttributeError, match=msg_no_other_geo_cols): | |
361 | GeoDataFrame().geometry | |
362 | ||
338 | 363 | def test_align(self): |
339 | 364 | df = self.df2 |
340 | 365 | |
505 | 530 | assert type(df2) is GeoDataFrame |
506 | 531 | assert self.df.crs == df2.crs |
507 | 532 | |
508 | def test_to_file_crs(self): | |
509 | """ | |
510 | Ensure that the file is written according to the crs | |
511 | if it is specified | |
512 | ||
513 | """ | |
514 | tempfilename = os.path.join(self.tempdir, "crs.shp") | |
515 | # save correct CRS | |
516 | self.df.to_file(tempfilename) | |
517 | df = GeoDataFrame.from_file(tempfilename) | |
518 | assert df.crs == self.df.crs | |
519 | # overwrite CRS | |
520 | self.df.to_file(tempfilename, crs=3857) | |
521 | df = GeoDataFrame.from_file(tempfilename) | |
522 | assert df.crs == "epsg:3857" | |
523 | ||
524 | # specify CRS for gdf without one | |
525 | df2 = self.df.copy() | |
526 | df2.crs = None | |
527 | df2.to_file(tempfilename, crs=2263) | |
528 | df = GeoDataFrame.from_file(tempfilename) | |
529 | assert df.crs == "epsg:2263" | |
530 | ||
531 | def test_to_file_with_duplicate_columns(self): | |
532 | df = GeoDataFrame( | |
533 | data=[[1, 2, 3]], columns=["a", "b", "a"], geometry=[Point(1, 1)] | |
534 | ) | |
535 | with pytest.raises( | |
536 | ValueError, match="GeoDataFrame cannot contain duplicated column names." | |
537 | ): | |
538 | tempfilename = os.path.join(self.tempdir, "crs.shp") | |
539 | df.to_file(tempfilename) | |
540 | ||
541 | 533 | def test_bool_index(self): |
542 | 534 | # Find boros with 'B' in their name |
543 | 535 | df = self.df[self.df["BoroName"].str.contains("B")] |
593 | 585 | p3 = Point(3, 3) |
594 | 586 | f3 = { |
595 | 587 | "type": "Feature", |
596 | "properties": {"a": 2}, | |
588 | "properties": None, | |
597 | 589 | "geometry": p3.__geo_interface__, |
598 | 590 | } |
599 | 591 | |
601 | 593 | |
602 | 594 | result = df[["a", "b"]] |
603 | 595 | expected = pd.DataFrame.from_dict( |
604 | [{"a": 0, "b": np.nan}, {"a": np.nan, "b": 1}, {"a": 2, "b": np.nan}] | |
596 | [{"a": 0, "b": np.nan}, {"a": np.nan, "b": 1}, {"a": np.nan, "b": np.nan}] | |
605 | 597 | ) |
606 | 598 | assert_frame_equal(expected, result) |
599 | ||
600 | def test_from_features_empty_properties(self): | |
601 | geojson_properties_object = """{ | |
602 | "type": "FeatureCollection", | |
603 | "features": [ | |
604 | { | |
605 | "type": "Feature", | |
606 | "properties": {}, | |
607 | "geometry": { | |
608 | "type": "Polygon", | |
609 | "coordinates": [ | |
610 | [ | |
611 | [ | |
612 | 11.3456529378891, | |
613 | 46.49461446367692 | |
614 | ], | |
615 | [ | |
616 | 11.345674395561216, | |
617 | 46.494097442978195 | |
618 | ], | |
619 | [ | |
620 | 11.346918940544128, | |
621 | 46.49385370294394 | |
622 | ], | |
623 | [ | |
624 | 11.347616314888, | |
625 | 46.4938352377453 | |
626 | ], | |
627 | [ | |
628 | 11.347514390945435, | |
629 | 46.49466985846028 | |
630 | ], | |
631 | [ | |
632 | 11.3456529378891, | |
633 | 46.49461446367692 | |
634 | ] | |
635 | ] | |
636 | ] | |
637 | } | |
638 | } | |
639 | ] | |
640 | }""" | |
641 | ||
642 | geojson_properties_null = """{ | |
643 | "type": "FeatureCollection", | |
644 | "features": [ | |
645 | { | |
646 | "type": "Feature", | |
647 | "properties": null, | |
648 | "geometry": { | |
649 | "type": "Polygon", | |
650 | "coordinates": [ | |
651 | [ | |
652 | [ | |
653 | 11.3456529378891, | |
654 | 46.49461446367692 | |
655 | ], | |
656 | [ | |
657 | 11.345674395561216, | |
658 | 46.494097442978195 | |
659 | ], | |
660 | [ | |
661 | 11.346918940544128, | |
662 | 46.49385370294394 | |
663 | ], | |
664 | [ | |
665 | 11.347616314888, | |
666 | 46.4938352377453 | |
667 | ], | |
668 | [ | |
669 | 11.347514390945435, | |
670 | 46.49466985846028 | |
671 | ], | |
672 | [ | |
673 | 11.3456529378891, | |
674 | 46.49461446367692 | |
675 | ] | |
676 | ] | |
677 | ] | |
678 | } | |
679 | } | |
680 | ] | |
681 | }""" | |
682 | ||
683 | # geoJSON with empty properties | |
684 | gjson_po = json.loads(geojson_properties_object) | |
685 | gdf1 = GeoDataFrame.from_features(gjson_po) | |
686 | ||
687 | # geoJSON with null properties | |
688 | gjson_null = json.loads(geojson_properties_null) | |
689 | gdf2 = GeoDataFrame.from_features(gjson_null) | |
690 | ||
691 | assert_frame_equal(gdf1, gdf2) | |
607 | 692 | |
608 | 693 | def test_from_features_geom_interface_feature(self): |
609 | 694 | class Placemark(object): |
633 | 718 | geometry = [Point(xy) for xy in zip(df["lon"], df["lat"])] |
634 | 719 | gdf = GeoDataFrame(df, geometry=geometry) |
635 | 720 | # from_features returns sorted columns |
636 | expected = gdf[["geometry", "lat", "lon", "name"]] | |
721 | expected = gdf[["geometry", "name", "lat", "lon"]] | |
637 | 722 | |
638 | 723 | # test FeatureCollection |
639 | 724 | res = GeoDataFrame.from_features(gdf.__geo_interface__) |
760 | 845 | assert self.df.crs == unpickled.crs |
761 | 846 | |
762 | 847 | def test_estimate_utm_crs(self): |
763 | if PYPROJ_LT_3: | |
848 | if compat.PYPROJ_LT_3: | |
764 | 849 | with pytest.raises(RuntimeError, match=r"pyproj 3\+ required"): |
765 | 850 | self.df.estimate_utm_crs() |
766 | 851 | else: |
767 | 852 | assert self.df.estimate_utm_crs() == CRS("EPSG:32618") |
768 | assert self.df.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
853 | if compat.PYPROJ_GE_32: # result is unstable in older pyproj | |
854 | assert self.df.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
769 | 855 | |
770 | 856 | def test_to_wkb(self): |
771 | 857 | wkbs0 = [ |
6 | 6 | from shapely.geometry import LinearRing, LineString, MultiPoint, Point, Polygon |
7 | 7 | from shapely.geometry.collection import GeometryCollection |
8 | 8 | from shapely.ops import unary_union |
9 | from shapely import wkt | |
9 | 10 | |
10 | 11 | from geopandas import GeoDataFrame, GeoSeries |
11 | 12 | from geopandas.base import GeoPandasBase |
71 | 72 | self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326") |
72 | 73 | self.pt2d = Point(-73.9847, 40.7484) |
73 | 74 | self.landmarks_mixed = GeoSeries([self.esb, self.sol, self.pt2d], crs=4326) |
75 | self.pt_empty = wkt.loads("POINT EMPTY") | |
76 | self.landmarks_mixed_empty = GeoSeries( | |
77 | [self.esb, self.sol, self.pt2d, self.pt_empty], crs=4326 | |
78 | ) | |
74 | 79 | self.l1 = LineString([(0, 0), (0, 1), (1, 1)]) |
75 | 80 | self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) |
76 | 81 | self.g5 = GeoSeries([self.l1, self.l2]) |
79 | 84 | self.g8 = GeoSeries([self.t1, self.t5]) |
80 | 85 | self.empty = GeoSeries([]) |
81 | 86 | self.all_none = GeoSeries([None, None]) |
87 | self.all_geometry_collection_empty = GeoSeries( | |
88 | [GeometryCollection([]), GeometryCollection([])] | |
89 | ) | |
82 | 90 | self.empty_poly = Polygon() |
83 | 91 | self.g9 = GeoSeries(self.g0, index=range(1, 8)) |
92 | self.g10 = GeoSeries([self.t1, self.t4]) | |
84 | 93 | |
85 | 94 | # Crossed lines |
86 | 95 | self.l3 = LineString([(0, 0), (1, 1)]) |
247 | 256 | with pytest.warns(UserWarning, match="The indices .+ different"): |
248 | 257 | assert len(self.g0.intersection(self.g9, align=True) == 8) |
249 | 258 | assert len(self.g0.intersection(self.g9, align=False) == 7) |
259 | ||
260 | def test_clip_by_rect(self): | |
261 | self._test_binary_topological( | |
262 | "clip_by_rect", self.g1, self.g10, *self.sq.bounds | |
263 | ) | |
264 | # self.g1 and self.t3.bounds do not intersect | |
265 | self._test_binary_topological( | |
266 | "clip_by_rect", self.all_geometry_collection_empty, self.g1, *self.t3.bounds | |
267 | ) | |
250 | 268 | |
251 | 269 | def test_union_series(self): |
252 | 270 | self._test_binary_topological("union", self.sq, self.g1, self.g2) |
471 | 489 | ) |
472 | 490 | assert_array_dtype_equal(expected, self.na_none.distance(self.p0)) |
473 | 491 | |
474 | expected = Series(np.array([np.sqrt(4 ** 2 + 4 ** 2), np.nan]), self.g6.index) | |
492 | expected = Series(np.array([np.sqrt(4**2 + 4**2), np.nan]), self.g6.index) | |
475 | 493 | assert_array_dtype_equal(expected, self.g6.distance(self.na_none)) |
476 | 494 | |
477 | 495 | expected = Series(np.array([np.nan, 0, 0, 0, 0, 0, np.nan, np.nan]), range(8)) |
621 | 639 | # mixed dimensions |
622 | 640 | expected_z = [30.3244, 31.2344, np.nan] |
623 | 641 | assert_array_dtype_equal(expected_z, self.landmarks_mixed.geometry.z) |
642 | ||
643 | def test_xyz_points_empty(self): | |
644 | expected_x = [-73.9847, -74.0446, -73.9847, np.nan] | |
645 | expected_y = [40.7484, 40.6893, 40.7484, np.nan] | |
646 | expected_z = [30.3244, 31.2344, np.nan, np.nan] | |
647 | ||
648 | assert_array_dtype_equal(expected_x, self.landmarks_mixed_empty.geometry.x) | |
649 | assert_array_dtype_equal(expected_y, self.landmarks_mixed_empty.geometry.y) | |
650 | assert_array_dtype_equal(expected_z, self.landmarks_mixed_empty.geometry.z) | |
624 | 651 | |
625 | 652 | def test_xyz_polygons(self): |
626 | 653 | # accessing x attribute in polygon geoseries should raise an error |
1089 | 1116 | test_df = df.explode(ignore_index=True, index_parts=True) |
1090 | 1117 | assert_frame_equal(test_df, expected_df) |
1091 | 1118 | |
1119 | def test_explode_order(self): | |
1120 | df = GeoDataFrame( | |
1121 | {"vals": [1, 2, 3]}, | |
1122 | geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)], | |
1123 | index=[2, 9, 7], | |
1124 | ) | |
1125 | test_df = df.explode(index_parts=True) | |
1126 | ||
1127 | expected_index = MultiIndex.from_arrays( | |
1128 | [[2, 2, 9, 9, 7, 7], [0, 1, 0, 1, 0, 1]], | |
1129 | ) | |
1130 | expected_geometry = GeoSeries( | |
1131 | [ | |
1132 | Point(0, 0), | |
1133 | Point(0, 0), | |
1134 | Point(1, 1), | |
1135 | Point(1, 0), | |
1136 | Point(2, 2), | |
1137 | Point(2, 0), | |
1138 | ], | |
1139 | index=expected_index, | |
1140 | ) | |
1141 | expected_df = GeoDataFrame( | |
1142 | {"vals": [1, 1, 2, 2, 3, 3]}, | |
1143 | geometry=expected_geometry, | |
1144 | index=expected_index, | |
1145 | ) | |
1146 | assert_geodataframe_equal(test_df, expected_df) | |
1147 | ||
1148 | def test_explode_order_no_multi(self): | |
1149 | df = GeoDataFrame( | |
1150 | {"vals": [1, 2, 3]}, | |
1151 | geometry=[Point(0, x) for x in range(3)], | |
1152 | index=[2, 9, 7], | |
1153 | ) | |
1154 | test_df = df.explode(index_parts=True) | |
1155 | ||
1156 | expected_index = MultiIndex.from_arrays( | |
1157 | [[2, 9, 7], [0, 0, 0]], | |
1158 | ) | |
1159 | expected_df = GeoDataFrame( | |
1160 | {"vals": [1, 2, 3]}, | |
1161 | geometry=[Point(0, x) for x in range(3)], | |
1162 | index=expected_index, | |
1163 | ) | |
1164 | assert_geodataframe_equal(test_df, expected_df) | |
1165 | ||
1166 | def test_explode_order_mixed(self): | |
1167 | df = GeoDataFrame( | |
1168 | {"vals": [1, 2, 3]}, | |
1169 | geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(2)] + [Point(0, 10)], | |
1170 | index=[2, 9, 7], | |
1171 | ) | |
1172 | test_df = df.explode(index_parts=True) | |
1173 | ||
1174 | expected_index = MultiIndex.from_arrays( | |
1175 | [[2, 2, 9, 9, 7], [0, 1, 0, 1, 0]], | |
1176 | ) | |
1177 | expected_geometry = GeoSeries( | |
1178 | [ | |
1179 | Point(0, 0), | |
1180 | Point(0, 0), | |
1181 | Point(1, 1), | |
1182 | Point(1, 0), | |
1183 | Point(0, 10), | |
1184 | ], | |
1185 | index=expected_index, | |
1186 | ) | |
1187 | expected_df = GeoDataFrame( | |
1188 | {"vals": [1, 1, 2, 2, 3]}, | |
1189 | geometry=expected_geometry, | |
1190 | index=expected_index, | |
1191 | ) | |
1192 | assert_geodataframe_equal(test_df, expected_df) | |
1193 | ||
1194 | def test_explode_duplicated_index(self): | |
1195 | df = GeoDataFrame( | |
1196 | {"vals": [1, 2, 3]}, | |
1197 | geometry=[MultiPoint([(x, x), (x, 0)]) for x in range(3)], | |
1198 | index=[1, 1, 2], | |
1199 | ) | |
1200 | test_df = df.explode(index_parts=True) | |
1201 | expected_index = MultiIndex.from_arrays( | |
1202 | [[1, 1, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], | |
1203 | ) | |
1204 | expected_geometry = GeoSeries( | |
1205 | [ | |
1206 | Point(0, 0), | |
1207 | Point(0, 0), | |
1208 | Point(1, 1), | |
1209 | Point(1, 0), | |
1210 | Point(2, 2), | |
1211 | Point(2, 0), | |
1212 | ], | |
1213 | index=expected_index, | |
1214 | ) | |
1215 | expected_df = GeoDataFrame( | |
1216 | {"vals": [1, 1, 2, 2, 3, 3]}, | |
1217 | geometry=expected_geometry, | |
1218 | index=expected_index, | |
1219 | ) | |
1220 | assert_geodataframe_equal(test_df, expected_df) | |
1221 | ||
1092 | 1222 | # |
1093 | 1223 | # Test '&', '|', '^', and '-' |
1094 | 1224 | # |
1095 | 1225 | def test_intersection_operator(self): |
1096 | with pytest.warns(DeprecationWarning): | |
1226 | with pytest.warns(FutureWarning): | |
1097 | 1227 | self._test_binary_operator("__and__", self.t1, self.g1, self.g2) |
1098 | with pytest.warns(DeprecationWarning): | |
1228 | with pytest.warns(FutureWarning): | |
1099 | 1229 | self._test_binary_operator("__and__", self.t1, self.gdf1, self.g2) |
1100 | 1230 | |
1101 | 1231 | def test_union_operator(self): |
1102 | with pytest.warns(DeprecationWarning): | |
1232 | with pytest.warns(FutureWarning): | |
1103 | 1233 | self._test_binary_operator("__or__", self.sq, self.g1, self.g2) |
1104 | with pytest.warns(DeprecationWarning): | |
1234 | with pytest.warns(FutureWarning): | |
1105 | 1235 | self._test_binary_operator("__or__", self.sq, self.gdf1, self.g2) |
1106 | 1236 | |
1107 | 1237 | def test_union_operator_polygon(self): |
1108 | with pytest.warns(DeprecationWarning): | |
1238 | with pytest.warns(FutureWarning): | |
1109 | 1239 | self._test_binary_operator("__or__", self.sq, self.g1, self.t2) |
1110 | with pytest.warns(DeprecationWarning): | |
1240 | with pytest.warns(FutureWarning): | |
1111 | 1241 | self._test_binary_operator("__or__", self.sq, self.gdf1, self.t2) |
1112 | 1242 | |
1113 | 1243 | def test_symmetric_difference_operator(self): |
1114 | with pytest.warns(DeprecationWarning): | |
1244 | with pytest.warns(FutureWarning): | |
1115 | 1245 | self._test_binary_operator("__xor__", self.sq, self.g3, self.g4) |
1116 | with pytest.warns(DeprecationWarning): | |
1246 | with pytest.warns(FutureWarning): | |
1117 | 1247 | self._test_binary_operator("__xor__", self.sq, self.gdf3, self.g4) |
1118 | 1248 | |
1119 | 1249 | def test_difference_series2(self): |
1120 | 1250 | expected = GeoSeries([GeometryCollection(), self.t2]) |
1121 | with pytest.warns(DeprecationWarning): | |
1251 | with pytest.warns(FutureWarning): | |
1122 | 1252 | self._test_binary_operator("__sub__", expected, self.g1, self.g2) |
1123 | with pytest.warns(DeprecationWarning): | |
1253 | with pytest.warns(FutureWarning): | |
1124 | 1254 | self._test_binary_operator("__sub__", expected, self.gdf1, self.g2) |
1125 | 1255 | |
1126 | 1256 | def test_difference_poly2(self): |
1127 | 1257 | expected = GeoSeries([self.t1, self.t1]) |
1128 | with pytest.warns(DeprecationWarning): | |
1258 | with pytest.warns(FutureWarning): | |
1129 | 1259 | self._test_binary_operator("__sub__", expected, self.g1, self.t2) |
1130 | with pytest.warns(DeprecationWarning): | |
1260 | with pytest.warns(FutureWarning): | |
1131 | 1261 | self._test_binary_operator("__sub__", expected, self.gdf1, self.t2) |
6 | 6 | import numpy as np |
7 | 7 | from numpy.testing import assert_array_equal |
8 | 8 | import pandas as pd |
9 | from pandas.util.testing import assert_index_equal | |
9 | from pandas.testing import assert_index_equal | |
10 | 10 | |
11 | 11 | from pyproj import CRS |
12 | 12 | from shapely.geometry import ( |
20 | 20 | from shapely.geometry.base import BaseGeometry |
21 | 21 | |
22 | 22 | from geopandas import GeoSeries, GeoDataFrame, read_file, datasets, clip |
23 | from geopandas._compat import PYPROJ_LT_3, ignore_shapely2_warnings | |
23 | from geopandas._compat import ignore_shapely2_warnings | |
24 | 24 | from geopandas.array import GeometryArray, GeometryDtype |
25 | 25 | from geopandas.testing import assert_geoseries_equal |
26 | 26 | |
27 | 27 | from geopandas.tests.util import geom_equals |
28 | 28 | from pandas.testing import assert_series_equal |
29 | 29 | import pytest |
30 | ||
31 | import geopandas._compat as compat | |
30 | 32 | |
31 | 33 | |
32 | 34 | class TestSeries: |
204 | 206 | self.landmarks.to_crs(crs=None, epsg=None) |
205 | 207 | |
206 | 208 | def test_estimate_utm_crs__geographic(self): |
207 | if PYPROJ_LT_3: | |
209 | if compat.PYPROJ_LT_3: | |
208 | 210 | with pytest.raises(RuntimeError, match=r"pyproj 3\+ required"): |
209 | 211 | self.landmarks.estimate_utm_crs() |
210 | 212 | else: |
211 | 213 | assert self.landmarks.estimate_utm_crs() == CRS("EPSG:32618") |
212 | assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
213 | ||
214 | @pytest.mark.skipif(PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
214 | if compat.PYPROJ_GE_32: # result is unstable in older pyproj | |
215 | assert self.landmarks.estimate_utm_crs("NAD83") == CRS("EPSG:26918") | |
216 | ||
217 | @pytest.mark.skipif(compat.PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
215 | 218 | def test_estimate_utm_crs__projected(self): |
216 | 219 | assert self.landmarks.to_crs("EPSG:3857").estimate_utm_crs() == CRS( |
217 | 220 | "EPSG:32618" |
218 | 221 | ) |
219 | 222 | |
220 | @pytest.mark.skipif(PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
223 | @pytest.mark.skipif(compat.PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
221 | 224 | def test_estimate_utm_crs__out_of_bounds(self): |
222 | 225 | with pytest.raises(RuntimeError, match="Unable to determine UTM CRS"): |
223 | 226 | GeoSeries( |
224 | 227 | [Polygon([(0, 90), (1, 90), (2, 90)])], crs="EPSG:4326" |
225 | 228 | ).estimate_utm_crs() |
226 | 229 | |
227 | @pytest.mark.skipif(PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
230 | @pytest.mark.skipif(compat.PYPROJ_LT_3, reason="requires pyproj 3 or higher") | |
228 | 231 | def test_estimate_utm_crs__missing_crs(self): |
229 | 232 | with pytest.raises(RuntimeError, match="crs must be set"): |
230 | 233 | GeoSeries([Polygon([(0, 90), (1, 90), (2, 90)])]).estimate_utm_crs() |
378 | 381 | assert_geoseries_equal(expected, GeoSeries.from_xy(x, y, z)) |
379 | 382 | |
380 | 383 | |
381 | def test_missing_values_empty_warning(): | |
382 | s = GeoSeries([Point(1, 1), None, np.nan, BaseGeometry(), Polygon()]) | |
383 | with pytest.warns(UserWarning): | |
384 | s.isna() | |
385 | ||
386 | with pytest.warns(UserWarning): | |
387 | s.notna() | |
388 | ||
389 | ||
390 | 384 | @pytest.mark.filterwarnings("ignore::UserWarning") |
391 | 385 | def test_missing_values(): |
392 | 386 | s = GeoSeries([Point(1, 1), None, np.nan, BaseGeometry(), Polygon()]) |
411 | 405 | |
412 | 406 | |
413 | 407 | def test_isna_empty_geoseries(): |
414 | # ensure that isna() result for emtpy GeoSeries has the correct bool dtype | |
408 | # ensure that isna() result for empty GeoSeries has the correct bool dtype | |
415 | 409 | s = GeoSeries([]) |
416 | 410 | result = s.isna() |
417 | 411 | assert_series_equal(result, pd.Series([], dtype="bool")) |
469 | 463 | for x in gs: |
470 | 464 | assert x.equals(g) |
471 | 465 | |
472 | def test_no_geometries_fallback(self): | |
473 | with pytest.warns(FutureWarning): | |
474 | s = GeoSeries([True, False, True]) | |
475 | assert not isinstance(s, GeoSeries) | |
476 | assert type(s) == pd.Series | |
477 | ||
478 | with pytest.warns(FutureWarning): | |
479 | s = GeoSeries(["a", "b", "c"]) | |
480 | assert not isinstance(s, GeoSeries) | |
481 | assert type(s) == pd.Series | |
482 | ||
483 | with pytest.warns(FutureWarning): | |
484 | s = GeoSeries([[1, 2], [3, 4]]) | |
485 | assert not isinstance(s, GeoSeries) | |
486 | assert type(s) == pd.Series | |
466 | def test_non_geometry_raises(self): | |
467 | with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"): | |
468 | GeoSeries([True, False, True]) | |
469 | ||
470 | with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"): | |
471 | GeoSeries(["a", "b", "c"]) | |
472 | ||
473 | with pytest.raises(TypeError, match="Non geometry data passed to GeoSeries"): | |
474 | GeoSeries([[1, 2], [3, 4]]) | |
487 | 475 | |
488 | 476 | def test_empty(self): |
489 | 477 | s = GeoSeries([]) |
499 | 487 | def test_empty_array(self): |
500 | 488 | # with empty data that have an explicit dtype, we use the fallback or |
501 | 489 | # not depending on the dtype |
502 | arr = np.array([], dtype="bool") | |
503 | 490 | |
504 | 491 | # dtypes that can never hold geometry-like data |
505 | 492 | for arr in [ |
509 | 496 | # this gets converted to object dtype by pandas |
510 | 497 | # np.array([], dtype="str"), |
511 | 498 | ]: |
512 | with pytest.warns(FutureWarning): | |
513 | s = GeoSeries(arr) | |
514 | assert not isinstance(s, GeoSeries) | |
515 | assert type(s) == pd.Series | |
499 | with pytest.raises( | |
500 | TypeError, match="Non geometry data passed to GeoSeries" | |
501 | ): | |
502 | GeoSeries(arr) | |
516 | 503 | |
517 | 504 | # dtypes that can potentially hold geometry-like data (object) or |
518 | 505 | # can come from empty data (float64) |
543 | 530 | assert s.index is g.index |
544 | 531 | |
545 | 532 | # GH 1216 |
546 | def test_expanddim(self): | |
533 | @pytest.mark.parametrize("name", [None, "geometry", "Points"]) | |
534 | @pytest.mark.parametrize("crs", [None, "epsg:4326"]) | |
535 | def test_reset_index(self, name, crs): | |
547 | 536 | s = GeoSeries( |
548 | [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])] | |
537 | [MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)])], | |
538 | name=name, | |
539 | crs=crs, | |
549 | 540 | ) |
550 | 541 | s = s.explode(index_parts=True) |
551 | 542 | df = s.reset_index() |
552 | 543 | assert type(df) == GeoDataFrame |
544 | # name None -> 0, otherwise name preserved | |
545 | assert df.geometry.name == (name if name is not None else 0) | |
546 | assert df.crs == s.crs | |
547 | ||
548 | @pytest.mark.parametrize("name", [None, "geometry", "Points"]) | |
549 | @pytest.mark.parametrize("crs", [None, "epsg:4326"]) | |
550 | def test_to_frame(self, name, crs): | |
551 | s = GeoSeries([Point(0, 0), Point(1, 1)], name=name, crs=crs) | |
552 | df = s.to_frame() | |
553 | assert type(df) == GeoDataFrame | |
554 | # name None -> 0, otherwise name preserved | |
555 | expected_name = name if name is not None else 0 | |
556 | assert df.geometry.name == expected_name | |
557 | assert df._geometry_column_name == expected_name | |
558 | assert df.crs == s.crs | |
559 | ||
560 | # if name is provided to to_frame, it should override | |
561 | df2 = s.to_frame(name="geom") | |
562 | assert type(df) == GeoDataFrame | |
563 | assert df2.geometry.name == "geom" | |
564 | assert df2.crs == s.crs | |
553 | 565 | |
554 | 566 | def test_explode_without_multiindex(self): |
555 | 567 | s = GeoSeries( |
565 | 577 | ) |
566 | 578 | s = s.explode(ignore_index=True) |
567 | 579 | expected_index = pd.Index(range(len(s))) |
568 | print(expected_index) | |
569 | 580 | assert_index_equal(s.index, expected_index) |
570 | 581 | |
571 | 582 | # index_parts is ignored if ignore_index=True |
0 | 0 | import pandas as pd |
1 | 1 | import pytest |
2 | 2 | from geopandas.testing import assert_geodataframe_equal |
3 | from pandas.testing import assert_index_equal | |
3 | 4 | |
4 | 5 | from shapely.geometry import Point |
5 | 6 | |
6 | 7 | from geopandas import GeoDataFrame, GeoSeries |
8 | from geopandas import _compat as compat | |
7 | 9 | |
8 | 10 | |
9 | 11 | class TestMerging: |
96 | 98 | res3 = pd.concat([df2.set_crs("epsg:4326"), self.gdf], axis=1) |
97 | 99 | # check metadata comes from first df |
98 | 100 | self._check_metadata(res3, geometry_column_name="geom", crs="epsg:4326") |
101 | ||
102 | @pytest.mark.xfail( | |
103 | not compat.PANDAS_GE_11, | |
104 | reason="pandas <=1.0 hard codes concat([GeoSeries, GeoSeries]) -> " | |
105 | "DataFrame or Union[DataFrame, SparseDataFrame] in 0.25", | |
106 | ) | |
107 | @pytest.mark.filterwarnings("ignore:Accessing CRS") | |
108 | def test_concat_axis1_geoseries(self): | |
109 | gseries2 = GeoSeries([Point(i, i) for i in range(3, 6)], crs="epsg:4326") | |
110 | result = pd.concat([gseries2, self.gseries], axis=1) | |
111 | # Note this is not consistent with concat([gdf, gdf], axis=1) where the | |
112 | # left metadata is set on the result. This is deliberate for now. | |
113 | assert type(result) is GeoDataFrame | |
114 | self._check_metadata(result, geometry_column_name=None, crs=None) | |
115 | assert_index_equal(pd.Index([0, 1]), result.columns) | |
116 | ||
117 | gseries2.name = "foo" | |
118 | result2 = pd.concat([gseries2, self.gseries], axis=1) | |
119 | assert type(result2) is GeoDataFrame | |
120 | self._check_metadata(result2, geometry_column_name=None, crs=None) | |
121 | assert_index_equal(pd.Index(["foo", 0]), result2.columns) |
0 | import pandas as pd | |
1 | import pyproj | |
2 | import pytest | |
3 | import geopandas._compat as compat | |
4 | ||
5 | from shapely.geometry import Point | |
6 | import numpy as np | |
7 | ||
8 | from geopandas import GeoDataFrame, GeoSeries | |
9 | ||
10 | ||
11 | crs_osgb = pyproj.CRS(27700) | |
12 | crs_wgs = pyproj.CRS(4326) | |
13 | ||
14 | ||
15 | N = 10 | |
16 | ||
17 | ||
18 | @pytest.fixture(params=["geometry", "point"]) | |
19 | def df(request): | |
20 | geo_name = request.param | |
21 | ||
22 | df = GeoDataFrame( | |
23 | [ | |
24 | { | |
25 | "value1": x + y, | |
26 | "value2": x * y, | |
27 | geo_name: Point(x, y), # rename this col in tests | |
28 | } | |
29 | for x, y in zip(range(N), range(N)) | |
30 | ], | |
31 | crs=crs_wgs, | |
32 | geometry=geo_name, | |
33 | ) | |
34 | # want geometry2 to be a GeoSeries not Series, test behaviour of non geom col | |
35 | df["geometry2"] = df[geo_name].set_crs(crs_osgb, allow_override=True) | |
36 | return df | |
37 | ||
38 | ||
39 | @pytest.fixture | |
40 | def df2(): | |
41 | """For constructor_sliced tests""" | |
42 | return GeoDataFrame( | |
43 | { | |
44 | "geometry": GeoSeries([Point(x, x) for x in range(3)]), | |
45 | "geometry2": GeoSeries([Point(x, x) for x in range(3)]), | |
46 | "geometry3": GeoSeries([Point(x, x) for x in range(3)]), | |
47 | "value": [1, 2, 1], | |
48 | "value_nan": np.nan, | |
49 | } | |
50 | ) | |
51 | ||
52 | ||
53 | def _check_metadata_gdf(gdf, geo_name="geometry", crs=crs_wgs): | |
54 | assert gdf._geometry_column_name == geo_name | |
55 | assert gdf.geometry.name == geo_name | |
56 | assert gdf.crs == crs | |
57 | ||
58 | ||
59 | def _check_metadata_gs(gs, name="geometry", crs=crs_wgs): | |
60 | assert gs.name == name | |
61 | assert gs.crs == crs | |
62 | ||
63 | ||
64 | def assert_object( | |
65 | result, expected_type, geo_name="geometry", crs=crs_wgs, check_none_name=False | |
66 | ): | |
67 | """ | |
68 | Helper method to make tests easier to read. Checks result is of the expected | |
69 | type. If result is a GeoDataFrame or GeoSeries, checks geo_name | |
70 | and crs match. If geo_name is None, then we expect a GeoDataFrame | |
71 | where the geometry column is invalid/ isn't set. This is never desirable, | |
72 | but is a reality of this first stage of implementation. | |
73 | """ | |
74 | assert type(result) is expected_type | |
75 | ||
76 | if expected_type == GeoDataFrame: | |
77 | if geo_name is not None: | |
78 | _check_metadata_gdf(result, geo_name=geo_name, crs=crs) | |
79 | else: | |
80 | if check_none_name: # TODO this is awkward | |
81 | assert result._geometry_column_name is None | |
82 | ||
83 | if result._geometry_column_name is None: | |
84 | msg = ( | |
85 | "You are calling a geospatial method on the GeoDataFrame, " | |
86 | "but the active" | |
87 | ) | |
88 | else: | |
89 | msg = ( | |
90 | "You are calling a geospatial method on the GeoDataFrame, but " | |
91 | r"the active geometry column \(" | |
92 | rf"'{result._geometry_column_name}'\) is not present" | |
93 | ) | |
94 | with pytest.raises(AttributeError, match=msg): | |
95 | result.geometry.name # be explicit that geometry is invalid here | |
96 | elif expected_type == GeoSeries: | |
97 | _check_metadata_gs(result, name=geo_name, crs=crs) | |
98 | ||
99 | ||
100 | def test_getitem(df): | |
101 | geo_name = df.geometry.name | |
102 | assert_object(df[["value1", "value2"]], pd.DataFrame) | |
103 | assert_object(df[[geo_name, "geometry2"]], GeoDataFrame, geo_name) | |
104 | assert_object(df[[geo_name]], GeoDataFrame, geo_name) | |
105 | assert_object(df[["geometry2", "value1"]], GeoDataFrame, None) | |
106 | assert_object(df[["geometry2"]], GeoDataFrame, None) | |
107 | assert_object(df[["value1"]], pd.DataFrame) | |
108 | # Series | |
109 | assert_object(df[geo_name], GeoSeries, geo_name) | |
110 | assert_object(df["geometry2"], GeoSeries, "geometry2", crs=crs_osgb) | |
111 | assert_object(df["value1"], pd.Series) | |
112 | ||
113 | ||
114 | def test_loc(df): | |
115 | geo_name = df.geometry.name | |
116 | assert_object(df.loc[:, ["value1", "value2"]], pd.DataFrame) | |
117 | assert_object(df.loc[:, [geo_name, "geometry2"]], GeoDataFrame, geo_name) | |
118 | assert_object(df.loc[:, [geo_name]], GeoDataFrame, geo_name) | |
119 | assert_object(df.loc[:, ["geometry2", "value1"]], GeoDataFrame, None) | |
120 | assert_object(df.loc[:, ["geometry2"]], GeoDataFrame, None) | |
121 | assert_object(df.loc[:, ["value1"]], pd.DataFrame) | |
122 | # Series | |
123 | assert_object(df.loc[:, geo_name], GeoSeries, geo_name) | |
124 | assert_object(df.loc[:, "geometry2"], GeoSeries, "geometry2", crs=crs_osgb) | |
125 | assert_object(df.loc[:, "value1"], pd.Series) | |
126 | ||
127 | ||
128 | def test_iloc(df): | |
129 | geo_name = df.geometry.name | |
130 | assert_object(df.iloc[:, 0:2], pd.DataFrame) | |
131 | assert_object(df.iloc[:, 2:4], GeoDataFrame, geo_name) | |
132 | assert_object(df.iloc[:, [2]], GeoDataFrame, geo_name) | |
133 | assert_object(df.iloc[:, [3, 0]], GeoDataFrame, None) | |
134 | assert_object(df.iloc[:, [3]], GeoDataFrame, None) | |
135 | assert_object(df.iloc[:, [0]], pd.DataFrame) | |
136 | # Series | |
137 | assert_object(df.iloc[:, 2], GeoSeries, geo_name) | |
138 | assert_object(df.iloc[:, 3], GeoSeries, "geometry2", crs=crs_osgb) | |
139 | assert_object(df.iloc[:, 0], pd.Series) | |
140 | ||
141 | ||
142 | def test_squeeze(df): | |
143 | geo_name = df.geometry.name | |
144 | assert_object(df[[geo_name]].squeeze(), GeoSeries, geo_name) | |
145 | assert_object(df[["geometry2"]].squeeze(), GeoSeries, "geometry2", crs=crs_osgb) | |
146 | ||
147 | ||
148 | def test_to_frame(df): | |
149 | geo_name = df.geometry.name | |
150 | res1 = df[geo_name].to_frame() | |
151 | assert_object(res1, GeoDataFrame, geo_name, crs=df[geo_name].crs) | |
152 | ||
153 | res2 = df["geometry2"].to_frame() | |
154 | assert_object(res2, GeoDataFrame, "geometry2", crs=crs_osgb) | |
155 | ||
156 | res3 = df["value1"].to_frame() | |
157 | assert_object(res3, pd.DataFrame) | |
158 | ||
159 | ||
160 | def test_reindex(df): | |
161 | geo_name = df.geometry.name | |
162 | assert_object(df.reindex(columns=["value1", "value2"]), pd.DataFrame) | |
163 | assert_object(df.reindex(columns=[geo_name, "geometry2"]), GeoDataFrame, geo_name) | |
164 | assert_object(df.reindex(columns=[geo_name]), GeoDataFrame, geo_name) | |
165 | assert_object(df.reindex(columns=["new_col", geo_name]), GeoDataFrame, geo_name) | |
166 | assert_object(df.reindex(columns=["geometry2", "value1"]), GeoDataFrame, None) | |
167 | assert_object(df.reindex(columns=["geometry2"]), GeoDataFrame, None) | |
168 | assert_object(df.reindex(columns=["value1"]), pd.DataFrame) | |
169 | ||
170 | # reindexing the rows always preserves the GeoDataFrame | |
171 | assert_object(df.reindex(index=[0, 1, 20]), GeoDataFrame, geo_name) | |
172 | ||
173 | # reindexing both rows and columns | |
174 | assert_object( | |
175 | df.reindex(index=[0, 1, 20], columns=[geo_name]), GeoDataFrame, geo_name | |
176 | ) | |
177 | assert_object(df.reindex(index=[0, 1, 20], columns=["value1"]), pd.DataFrame) | |
178 | ||
179 | ||
180 | def test_drop(df): | |
181 | geo_name = df.geometry.name | |
182 | assert_object(df.drop(columns=[geo_name, "geometry2"]), pd.DataFrame) | |
183 | assert_object(df.drop(columns=["value1", "value2"]), GeoDataFrame, geo_name) | |
184 | cols = ["value1", "value2", "geometry2"] | |
185 | assert_object(df.drop(columns=cols), GeoDataFrame, geo_name) | |
186 | assert_object(df.drop(columns=[geo_name, "value2"]), GeoDataFrame, None) | |
187 | assert_object(df.drop(columns=["value1", "value2", geo_name]), GeoDataFrame, None) | |
188 | assert_object(df.drop(columns=["geometry2", "value2", geo_name]), pd.DataFrame) | |
189 | ||
190 | ||
191 | def test_apply(df): | |
192 | geo_name = df.geometry.name | |
193 | ||
194 | def identity(x): | |
195 | return x | |
196 | ||
197 | # axis = 0 | |
198 | assert_object(df[["value1", "value2"]].apply(identity), pd.DataFrame) | |
199 | assert_object(df[[geo_name, "geometry2"]].apply(identity), GeoDataFrame, geo_name) | |
200 | assert_object(df[[geo_name]].apply(identity), GeoDataFrame, geo_name) | |
201 | assert_object(df[["geometry2", "value1"]].apply(identity), GeoDataFrame, None, None) | |
202 | assert_object(df[["geometry2"]].apply(identity), GeoDataFrame, None, None) | |
203 | assert_object(df[["value1"]].apply(identity), pd.DataFrame) | |
204 | ||
205 | # axis = 0, Series | |
206 | assert_object(df[geo_name].apply(identity), GeoSeries, geo_name) | |
207 | assert_object(df["geometry2"].apply(identity), GeoSeries, "geometry2", crs=crs_osgb) | |
208 | assert_object(df["value1"].apply(identity), pd.Series) | |
209 | ||
210 | # axis = 0, Series, no longer geometry | |
211 | assert_object(df[geo_name].apply(lambda x: str(x)), pd.Series) | |
212 | assert_object(df["geometry2"].apply(lambda x: str(x)), pd.Series) | |
213 | ||
214 | # axis = 1 | |
215 | assert_object(df[["value1", "value2"]].apply(identity, axis=1), pd.DataFrame) | |
216 | assert_object( | |
217 | df[[geo_name, "geometry2"]].apply(identity, axis=1), GeoDataFrame, geo_name | |
218 | ) | |
219 | assert_object(df[[geo_name]].apply(identity, axis=1), GeoDataFrame, geo_name) | |
220 | # TODO below should be a GeoDataFrame to be consistent with new getitem logic | |
221 | # leave as follow up as quite complicated | |
222 | # FrameColumnApply.series_generator returns object dtypes Series, so will have | |
223 | # patch result of apply | |
224 | assert_object(df[["geometry2", "value1"]].apply(identity, axis=1), pd.DataFrame) | |
225 | ||
226 | assert_object(df[["value1"]].apply(identity, axis=1), pd.DataFrame) | |
227 | ||
228 | ||
229 | @pytest.mark.xfail(not compat.PANDAS_GE_11, reason="apply is different in pandas 1.0.5") | |
230 | def test_apply_axis1_secondary_geo_cols(df): | |
231 | # note #GH2436 would also fix this | |
232 | def identity(x): | |
233 | return x | |
234 | ||
235 | assert_object(df[["geometry2"]].apply(identity, axis=1), GeoDataFrame, None, None) | |
236 | ||
237 | ||
238 | def test_expanddim_in_apply(): | |
239 | # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443 | |
240 | s = GeoSeries.from_xy([0, 1], [0, 1]) | |
241 | result = s.apply(lambda x: pd.Series([x.x, x.y])) | |
242 | assert_object(result, pd.DataFrame) | |
243 | ||
244 | ||
245 | @pytest.mark.xfail( | |
246 | not compat.PANDAS_GE_11, | |
247 | reason="pandas <1.1 don't preserve subclass through groupby ops", # Pandas GH33884 | |
248 | ) | |
249 | def test_expandim_in_groupby_aggregate_multiple_funcs(): | |
250 | # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443 | |
251 | # There are two calls to _constructor_expanddim here | |
252 | # SeriesGroupBy._aggregate_multiple_funcs() and | |
253 | # SeriesGroupBy._wrap_series_output() len(output) > 1 | |
254 | ||
255 | s = GeoSeries.from_xy([0, 1, 2], [0, 1, 3]) | |
256 | ||
257 | def union(s): | |
258 | return s.unary_union | |
259 | ||
260 | def total_area(s): | |
261 | return s.area.sum() | |
262 | ||
263 | grouped = s.groupby([0, 1, 0]) | |
264 | agg = grouped.agg([total_area, union]) | |
265 | assert_object(agg, GeoDataFrame, None, None, check_none_name=True) | |
266 | result = grouped.agg([union, total_area]) | |
267 | assert_object(result, GeoDataFrame, None, None, check_none_name=True) | |
268 | assert_object(grouped.agg([total_area, total_area]), pd.DataFrame) | |
269 | assert_object(grouped.agg([total_area]), pd.DataFrame) | |
270 | ||
271 | ||
272 | @pytest.mark.xfail( | |
273 | not compat.PANDAS_GE_11, | |
274 | reason="pandas <1.1 uses concat([Series]) in unstack", # Pandas GH33356 | |
275 | ) | |
276 | def test_expanddim_in_unstack(): | |
277 | # https://github.com/geopandas/geopandas/pull/2296#issuecomment-1021966443 | |
278 | s = GeoSeries.from_xy( | |
279 | [0, 1, 2], | |
280 | [0, 1, 3], | |
281 | index=pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")]), | |
282 | ) | |
283 | unstack = s.unstack() | |
284 | assert_object(unstack, GeoDataFrame, None, None, False) | |
285 | ||
286 | if compat.PANDAS_GE_12: | |
287 | assert unstack._geometry_column_name is None | |
288 | else: # pandas GH37369, unstack doesn't call finalize | |
289 | assert unstack._geometry_column_name == "geometry" | |
290 | ||
291 | ||
292 | # indexing / constructor_sliced tests | |
293 | ||
294 | test_case_column_sets = [ | |
295 | ["geometry"], | |
296 | ["geometry2"], | |
297 | ["geometry", "geometry2"], | |
298 | # non active geo col case | |
299 | ["geometry", "value"], | |
300 | ["geometry", "value_nan"], | |
301 | ["geometry2", "value"], | |
302 | ["geometry2", "value_nan"], | |
303 | ] | |
304 | ||
305 | ||
306 | @pytest.mark.parametrize( | |
307 | "column_set", | |
308 | test_case_column_sets, | |
309 | ids=[", ".join(i) for i in test_case_column_sets], | |
310 | ) | |
311 | def test_constructor_sliced_row_slices(df2, column_set): | |
312 | # https://github.com/geopandas/geopandas/issues/2282 | |
313 | df_subset = df2[column_set] | |
314 | assert isinstance(df_subset, GeoDataFrame) | |
315 | res = df_subset.loc[0] | |
316 | # row slices shouldn't be GeoSeries, even if they have a geometry col | |
317 | assert type(res) == pd.Series | |
318 | if "geometry" in column_set: | |
319 | assert not isinstance(res.geometry, pd.Series) | |
320 | assert res.geometry == Point(0, 0) | |
321 | ||
322 | ||
323 | def test_constructor_sliced_column_slices(df2): | |
324 | # Note loc doesn't use _constructor_sliced so it's not tested here | |
325 | geo_idx = df2.columns.get_loc("geometry") | |
326 | sub = df2.head(1) | |
327 | # column slices should be GeoSeries if of geometry type | |
328 | assert type(sub.iloc[:, geo_idx]) == GeoSeries | |
329 | assert type(sub.iloc[[0], geo_idx]) == GeoSeries | |
330 | sub = df2.head(2) | |
331 | assert type(sub.iloc[:, geo_idx]) == GeoSeries | |
332 | assert type(sub.iloc[[0, 1], geo_idx]) == GeoSeries | |
333 | ||
334 | # check iloc row slices are pd.Series instead | |
335 | assert type(df2.iloc[0, :]) == pd.Series | |
336 | ||
337 | ||
338 | def test_constructor_sliced_in_pandas_methods(df2): | |
339 | # constructor sliced is used in many places, checking a sample of non | |
340 | # geometry cases are sensible | |
341 | assert type(df2.count()) == pd.Series | |
342 | # drop the secondary geometry columns as not hashable | |
343 | hashable_test_df = df2.drop(columns=["geometry2", "geometry3"]) | |
344 | assert type(hashable_test_df.duplicated()) == pd.Series | |
345 | assert type(df2.quantile()) == pd.Series | |
346 | assert type(df2.memory_usage()) == pd.Series |
0 | 0 | import os |
1 | from distutils.version import LooseVersion | |
1 | from packaging.version import Version | |
2 | 2 | |
3 | 3 | import numpy as np |
4 | 4 | import pandas as pd |
5 | 5 | |
6 | 6 | from shapely.geometry import Point, Polygon, LineString, GeometryCollection, box |
7 | from fiona.errors import DriverError | |
8 | 7 | |
9 | 8 | import geopandas |
10 | 9 | from geopandas import GeoDataFrame, GeoSeries, overlay, read_file |
13 | 12 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal |
14 | 13 | import pytest |
15 | 14 | |
15 | try: | |
16 | from fiona.errors import DriverError | |
17 | except ImportError: | |
18 | ||
19 | class DriverError(Exception): | |
20 | pass | |
21 | ||
22 | ||
16 | 23 | DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data", "overlay") |
17 | 24 | |
18 | 25 | |
19 | 26 | pytestmark = pytest.mark.skip_no_sindex |
20 | pandas_133 = pd.__version__ == LooseVersion("1.3.3") | |
27 | pandas_133 = Version(pd.__version__) == Version("1.3.3") | |
21 | 28 | |
22 | 29 | |
23 | 30 | @pytest.fixture |
80 | 87 | os.path.join(DATA, "polys", "df1_df2-{0}.geojson".format(name)) |
81 | 88 | ) |
82 | 89 | expected.crs = None |
90 | for col in expected.columns[expected.dtypes == "int32"]: | |
91 | expected[col] = expected[col].astype("int64") | |
83 | 92 | return expected |
84 | 93 | |
85 | 94 | if how == "identity": |
528 | 537 | except OSError: # fiona < 1.8 |
529 | 538 | assert result.empty |
530 | 539 | |
540 | except RuntimeError: # pyogrio.DataSourceError | |
541 | assert result.empty | |
542 | ||
531 | 543 | |
532 | 544 | def test_mixed_geom_error(): |
533 | 545 | polys1 = GeoSeries( |
794 | 806 | expected = GeoDataFrame(columns=["foo", "bar", "geometry"]) |
795 | 807 | result = overlay(gdf1, gdf2, how="intersection") |
796 | 808 | assert_geodataframe_equal(result, expected, check_index_type=False) |
809 | ||
810 | ||
811 | class TestOverlayWikiExample: | |
812 | def setup_method(self): | |
813 | self.layer_a = GeoDataFrame(geometry=[box(0, 2, 6, 6)]) | |
814 | ||
815 | self.layer_b = GeoDataFrame(geometry=[box(4, 0, 10, 4)]) | |
816 | ||
817 | self.intersection = GeoDataFrame(geometry=[box(4, 2, 6, 4)]) | |
818 | ||
819 | self.union = GeoDataFrame( | |
820 | geometry=[ | |
821 | box(4, 2, 6, 4), | |
822 | Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]), | |
823 | Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]), | |
824 | ] | |
825 | ) | |
826 | ||
827 | self.a_difference_b = GeoDataFrame( | |
828 | geometry=[Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)])] | |
829 | ) | |
830 | ||
831 | self.b_difference_a = GeoDataFrame( | |
832 | geometry=[ | |
833 | Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]) | |
834 | ] | |
835 | ) | |
836 | ||
837 | self.symmetric_difference = GeoDataFrame( | |
838 | geometry=[ | |
839 | Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]), | |
840 | Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]), | |
841 | ] | |
842 | ) | |
843 | ||
844 | self.a_identity_b = GeoDataFrame( | |
845 | geometry=[ | |
846 | box(4, 2, 6, 4), | |
847 | Polygon([(4, 2), (0, 2), (0, 6), (6, 6), (6, 4), (4, 4), (4, 2)]), | |
848 | ] | |
849 | ) | |
850 | ||
851 | self.b_identity_a = GeoDataFrame( | |
852 | geometry=[ | |
853 | box(4, 2, 6, 4), | |
854 | Polygon([(10, 0), (4, 0), (4, 2), (6, 2), (6, 4), (10, 4), (10, 0)]), | |
855 | ] | |
856 | ) | |
857 | ||
858 | def test_intersection(self): | |
859 | df_result = overlay(self.layer_a, self.layer_b, how="intersection") | |
860 | assert df_result.geom_equals(self.intersection).bool() | |
861 | ||
862 | def test_union(self): | |
863 | df_result = overlay(self.layer_a, self.layer_b, how="union") | |
864 | assert_geodataframe_equal(df_result, self.union) | |
865 | ||
866 | def test_a_difference_b(self): | |
867 | df_result = overlay(self.layer_a, self.layer_b, how="difference") | |
868 | assert_geodataframe_equal(df_result, self.a_difference_b) | |
869 | ||
870 | def test_b_difference_a(self): | |
871 | df_result = overlay(self.layer_b, self.layer_a, how="difference") | |
872 | assert_geodataframe_equal(df_result, self.b_difference_a) | |
873 | ||
874 | def test_symmetric_difference(self): | |
875 | df_result = overlay(self.layer_a, self.layer_b, how="symmetric_difference") | |
876 | assert_geodataframe_equal(df_result, self.symmetric_difference) | |
877 | ||
878 | def test_a_identity_b(self): | |
879 | df_result = overlay(self.layer_a, self.layer_b, how="identity") | |
880 | assert_geodataframe_equal(df_result, self.a_identity_b) | |
881 | ||
882 | def test_b_identity_a(self): | |
883 | df_result = overlay(self.layer_b, self.layer_a, how="identity") | |
884 | assert_geodataframe_equal(df_result, self.b_identity_a) |
87 | 87 | |
88 | 88 | def test_indexing(s, df): |
89 | 89 | |
90 | # accessing scalar from the geometry (colunm) | |
90 | # accessing scalar from the geometry (column) | |
91 | 91 | exp = Point(1, 1) |
92 | 92 | assert s[1] == exp |
93 | 93 | assert s.loc[1] == exp |
138 | 138 | assert isinstance(res.geometry, GeoSeries) |
139 | 139 | assert_frame_equal(res, df[["value1", "geometry"]]) |
140 | 140 | |
141 | # TODO df.reindex(columns=['value1', 'value2']) still returns GeoDataFrame, | |
142 | # should it return DataFrame instead ? | |
141 | res = df.reindex(columns=["value1", "value2"]) | |
142 | assert type(res) == pd.DataFrame | |
143 | assert_frame_equal(res, df[["value1", "value2"]]) | |
143 | 144 | |
144 | 145 | |
145 | 146 | def test_take(s, df): |
240 | 241 | res = df.astype({"value1": float}) |
241 | 242 | assert isinstance(res, GeoDataFrame) |
242 | 243 | |
243 | # check whether returned object is a datafrane | |
244 | # check whether returned object is a dataframe | |
244 | 245 | res = df.astype(str) |
245 | 246 | assert isinstance(res, pd.DataFrame) and not isinstance(res, GeoDataFrame) |
246 | 247 | |
261 | 262 | assert res["a"].dtype == object |
262 | 263 | |
263 | 264 | |
264 | @pytest.mark.xfail( | |
265 | not compat.PANDAS_GE_10, | |
266 | reason="Convert dtypes new in pandas 1.0", | |
267 | raises=NotImplementedError, | |
268 | ) | |
269 | 265 | def test_convert_dtypes(df): |
270 | 266 | # https://github.com/geopandas/geopandas/issues/1870 |
271 | 267 | |
272 | 268 | # Test geometry col is first col, first, geom_col_name=geometry |
273 | 269 | # (order is important in concat, used internally) |
274 | res1 = df.convert_dtypes() # note res1 done first for pandas < 1 xfail check | |
270 | res1 = df.convert_dtypes() | |
275 | 271 | |
276 | 272 | expected1 = GeoDataFrame( |
277 | 273 | pd.DataFrame(df).convert_dtypes(), crs=df.crs, geometry=df.geometry.name |
382 | 378 | df2["geometry"] = s2 |
383 | 379 | res = df2.fillna(Point(1, 1)) |
384 | 380 | assert_geodataframe_equal(res, df) |
385 | with pytest.raises(NotImplementedError): | |
381 | with pytest.raises((NotImplementedError, TypeError)): # GH2351 | |
386 | 382 | df2.fillna(0) |
387 | 383 | |
388 | 384 | # allow non-geometry fill value if there are no missing values |
443 | 439 | assert_array_equal(s.unique(), exp) |
444 | 440 | |
445 | 441 | |
442 | def pd14_compat_index(index): | |
443 | if compat.PANDAS_GE_14: | |
444 | return from_shapely(index) | |
445 | else: | |
446 | return index | |
447 | ||
448 | ||
446 | 449 | def test_value_counts(): |
447 | 450 | # each object is considered unique |
448 | 451 | s = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)]) |
449 | 452 | res = s.value_counts() |
450 | 453 | with compat.ignore_shapely2_warnings(): |
451 | exp = pd.Series([2, 1], index=[Point(0, 0), Point(1, 1)]) | |
454 | exp = pd.Series([2, 1], index=pd14_compat_index([Point(0, 0), Point(1, 1)])) | |
452 | 455 | assert_series_equal(res, exp) |
453 | 456 | # Check crs doesn't make a difference - note it is not kept in output index anyway |
454 | 457 | s2 = GeoSeries([Point(0, 0), Point(1, 1), Point(0, 0)], crs="EPSG:4326") |
455 | 458 | res2 = s2.value_counts() |
456 | 459 | assert_series_equal(res2, exp) |
460 | if compat.PANDAS_GE_14: | |
461 | # TODO should/ can we fix CRS being lost | |
462 | assert s2.value_counts().index.array.crs is None | |
457 | 463 | |
458 | 464 | # check mixed geometry |
459 | 465 | s3 = GeoSeries([Point(0, 0), LineString([[1, 1], [2, 2]]), Point(0, 0)]) |
460 | 466 | res3 = s3.value_counts() |
467 | index = pd14_compat_index([Point(0, 0), LineString([[1, 1], [2, 2]])]) | |
461 | 468 | with compat.ignore_shapely2_warnings(): |
462 | exp3 = pd.Series([2, 1], index=[Point(0, 0), LineString([[1, 1], [2, 2]])]) | |
469 | exp3 = pd.Series([2, 1], index=index) | |
463 | 470 | assert_series_equal(res3, exp3) |
464 | 471 | |
465 | 472 | # check None is handled |
466 | 473 | s4 = GeoSeries([Point(0, 0), None, Point(0, 0)]) |
467 | 474 | res4 = s4.value_counts(dropna=True) |
468 | 475 | with compat.ignore_shapely2_warnings(): |
469 | exp4_dropna = pd.Series([2], index=[Point(0, 0)]) | |
476 | exp4_dropna = pd.Series([2], index=pd14_compat_index([Point(0, 0)])) | |
470 | 477 | assert_series_equal(res4, exp4_dropna) |
471 | 478 | with compat.ignore_shapely2_warnings(): |
472 | exp4_keepna = pd.Series([2, 1], index=[Point(0, 0), None]) | |
479 | exp4_keepna = pd.Series([2, 1], index=pd14_compat_index([Point(0, 0), None])) | |
473 | 480 | res4_keepna = s4.value_counts(dropna=False) |
474 | 481 | assert_series_equal(res4_keepna, exp4_keepna) |
475 | 482 | |
545 | 552 | assert_frame_equal(res, exp) |
546 | 553 | |
547 | 554 | |
555 | @pytest.mark.skip_no_sindex | |
556 | @pytest.mark.skipif( | |
557 | compat.PANDAS_GE_13 and not compat.PANDAS_GE_14, | |
558 | reason="this was broken in pandas 1.3.5 (GH-2294)", | |
559 | ) | |
560 | @pytest.mark.parametrize("crs", [None, "EPSG:4326"]) | |
561 | def test_groupby_metadata(crs): | |
562 | # https://github.com/geopandas/geopandas/issues/2294 | |
563 | df = GeoDataFrame( | |
564 | { | |
565 | "geometry": [Point(0, 0), Point(1, 1), Point(0, 0)], | |
566 | "value1": np.arange(3, dtype="int64"), | |
567 | "value2": np.array([1, 2, 1], dtype="int64"), | |
568 | }, | |
569 | crs=crs, | |
570 | ) | |
571 | ||
572 | # dummy test asserting we can access the crs | |
573 | def func(group): | |
574 | assert isinstance(group, GeoDataFrame) | |
575 | assert group.crs == crs | |
576 | ||
577 | df.groupby("value2").apply(func) | |
578 | ||
579 | # actual test with functionality | |
580 | res = df.groupby("value2").apply( | |
581 | lambda x: geopandas.sjoin(x, x[["geometry", "value1"]], how="inner") | |
582 | ) | |
583 | ||
584 | expected = ( | |
585 | df.take([0, 2, 0, 2, 1]) | |
586 | .set_index("value2", drop=False, append=True) | |
587 | .swaplevel() | |
588 | .rename(columns={"value1": "value1_left"}) | |
589 | .assign(value1_right=[0, 0, 2, 2, 1]) | |
590 | ) | |
591 | assert_geodataframe_equal(res.drop(columns=["index_right"]), expected) | |
592 | ||
593 | ||
548 | 594 | def test_apply(s): |
549 | 595 | # function that returns geometry preserves GeoSeries class |
550 | 596 | def geom_func(geom): |
586 | 632 | if crs: |
587 | 633 | df = df.set_crs(crs) |
588 | 634 | result = df.apply(lambda col: col.astype(str), axis=0) |
589 | # TODO this should actually not return a GeoDataFrame | |
590 | assert isinstance(result, GeoDataFrame) | |
635 | assert type(result) is pd.DataFrame | |
591 | 636 | expected = df.astype(str) |
592 | 637 | assert_frame_equal(result, expected) |
593 | 638 | |
594 | 639 | result = df.apply(lambda col: col.astype(str), axis=1) |
595 | assert isinstance(result, GeoDataFrame) | |
640 | assert type(result) is pd.DataFrame | |
596 | 641 | assert_frame_equal(result, expected) |
597 | 642 | |
598 | 643 | |
602 | 647 | assert result.geometry.name == "geom" |
603 | 648 | |
604 | 649 | |
605 | @pytest.mark.skipif(not compat.PANDAS_GE_10, reason="attrs introduced in pandas 1.0") | |
650 | def test_df_apply_returning_series(df): | |
651 | # https://github.com/geopandas/geopandas/issues/2283 | |
652 | result = df.apply(lambda row: row.geometry, axis=1) | |
653 | assert_geoseries_equal(result, df.geometry, check_crs=False) | |
654 | ||
655 | result = df.apply(lambda row: row.value1, axis=1) | |
656 | assert_series_equal(result, df["value1"].rename(None)) | |
657 | ||
658 | ||
606 | 659 | def test_preserve_attrs(df): |
607 | 660 | # https://github.com/geopandas/geopandas/issues/1654 |
608 | 661 | df.attrs["name"] = "my_name" |
0 | from distutils.version import LooseVersion | |
0 | from packaging.version import Version | |
1 | 1 | import itertools |
2 | 2 | import warnings |
3 | 3 | |
15 | 15 | MultiPoint, |
16 | 16 | MultiLineString, |
17 | 17 | GeometryCollection, |
18 | box, | |
18 | 19 | ) |
19 | 20 | |
20 | 21 | |
32 | 33 | try: # skipif and importorskip do not work for decorators |
33 | 34 | from matplotlib.testing.decorators import check_figures_equal |
34 | 35 | |
35 | if matplotlib.__version__ >= LooseVersion("3.3.0"): | |
36 | if Version(matplotlib.__version__) >= Version("3.3.0"): | |
36 | 37 | |
37 | 38 | MPL_DECORATORS = True |
38 | 39 | else: |
302 | 303 | with pytest.warns(UserWarning): |
303 | 304 | ax = s.plot() |
304 | 305 | assert len(ax.collections) == 0 |
305 | df = GeoDataFrame([]) | |
306 | df = GeoDataFrame([], columns=["geometry"]) | |
306 | 307 | with pytest.warns(UserWarning): |
307 | 308 | ax = df.plot() |
308 | 309 | assert len(ax.collections) == 0 |
403 | 404 | ): |
404 | 405 | self.df.plot(column="cats", categories=["cat1"]) |
405 | 406 | |
406 | def test_misssing(self): | |
407 | def test_missing(self): | |
407 | 408 | self.df.loc[0, "values"] = np.nan |
408 | 409 | ax = self.df.plot("values") |
409 | 410 | cmap = plt.get_cmap() |
426 | 427 | leg_colors1 = ax.get_legend().axes.collections[1].get_facecolors() |
427 | 428 | np.testing.assert_array_equal(point_colors[0], leg_colors[0]) |
428 | 429 | np.testing.assert_array_equal(nan_color[0], leg_colors1[0]) |
430 | ||
431 | def test_no_missing_and_missing_kwds(self): | |
432 | # GH2210 | |
433 | df = self.df.copy() | |
434 | df["category"] = df["values"].astype("str") | |
435 | df.plot("category", missing_kwds={"facecolor": "none"}, legend=True) | |
429 | 436 | |
430 | 437 | |
431 | 438 | class TestPointZPlotting: |
862 | 869 | def test_plot(self): |
863 | 870 | # basic test that points with z coords don't break plotting |
864 | 871 | self.df.plot() |
872 | ||
873 | ||
874 | class TestColorParamArray: | |
875 | def setup_method(self): | |
876 | geom = [] | |
877 | color = [] | |
878 | for a, b in [(0, 2), (4, 6)]: | |
879 | b = box(a, a, b, b) | |
880 | geom += [b, b.buffer(0.8).exterior, b.centroid] | |
881 | color += ["red", "green", "blue"] | |
882 | ||
883 | self.gdf = GeoDataFrame({"geometry": geom, "color_rgba": color}) | |
884 | self.mgdf = self.gdf.dissolve(self.gdf.type) | |
885 | ||
886 | def test_color_single(self): | |
887 | ax = self.gdf.plot(color=self.gdf["color_rgba"]) | |
888 | ||
889 | _check_colors( | |
890 | 4, | |
891 | np.concatenate([c.get_edgecolor() for c in ax.collections]), | |
892 | ["green"] * 2 + ["blue"] * 2, | |
893 | ) | |
894 | _check_colors( | |
895 | 4, | |
896 | np.concatenate([c.get_facecolor() for c in ax.collections]), | |
897 | ["red"] * 2 + ["blue"] * 2, | |
898 | ) | |
899 | ||
900 | def test_color_multi(self): | |
901 | ax = self.mgdf.plot(color=self.mgdf["color_rgba"]) | |
902 | ||
903 | _check_colors( | |
904 | 4, | |
905 | np.concatenate([c.get_edgecolor() for c in ax.collections]), | |
906 | ["green"] * 2 + ["blue"] * 2, | |
907 | ) | |
908 | _check_colors( | |
909 | 4, | |
910 | np.concatenate([c.get_facecolor() for c in ax.collections]), | |
911 | ["red"] * 2 + ["blue"] * 2, | |
912 | ) | |
865 | 913 | |
866 | 914 | |
867 | 915 | class TestGeometryCollectionPlotting: |
1063 | 1111 | import mapclassify # noqa |
1064 | 1112 | except ImportError: |
1065 | 1113 | pytest.importorskip("mapclassify") |
1114 | cls.mc = mapclassify | |
1066 | 1115 | cls.classifiers = list(mapclassify.classifiers.CLASSIFIERS) |
1067 | 1116 | cls.classifiers.remove("UserDefined") |
1068 | 1117 | pth = get_path("naturalearth_lowres") |
1083 | 1132 | ) |
1084 | 1133 | labels = [t.get_text() for t in ax.get_legend().get_texts()] |
1085 | 1134 | expected = [ |
1086 | u" 140.00, 5217064.00", | |
1087 | u" 5217064.00, 19532732.33", | |
1088 | u" 19532732.33, 1379302771.00", | |
1135 | s.split("|")[0][1:-2] | |
1136 | for s in str(self.mc.Quantiles(self.df["pop_est"], k=3)).split("\n")[4:] | |
1089 | 1137 | ] |
1090 | 1138 | assert labels == expected |
1091 | 1139 | |
1118 | 1166 | column="NEGATIVES", scheme="FISHER_JENKS", k=3, cmap="OrRd", legend=True |
1119 | 1167 | ) |
1120 | 1168 | labels = [t.get_text() for t in ax.get_legend().get_texts()] |
1121 | expected = [u"-10.00, -3.41", u" -3.41, 3.30", u" 3.30, 10.00"] | |
1169 | expected = ["-10.00, -3.41", " -3.41, 3.30", " 3.30, 10.00"] | |
1122 | 1170 | assert labels == expected |
1123 | 1171 | |
1124 | 1172 | def test_fmt(self): |
1131 | 1179 | legend_kwds={"fmt": "{:.0f}"}, |
1132 | 1180 | ) |
1133 | 1181 | labels = [t.get_text() for t in ax.get_legend().get_texts()] |
1134 | expected = [u"-10, -3", u" -3, 3", u" 3, 10"] | |
1182 | expected = ["-10, -3", " -3, 3", " 3, 10"] | |
1135 | 1183 | assert labels == expected |
1136 | 1184 | |
1137 | 1185 | def test_interval(self): |
1144 | 1192 | legend_kwds={"interval": True}, |
1145 | 1193 | ) |
1146 | 1194 | labels = [t.get_text() for t in ax.get_legend().get_texts()] |
1147 | expected = [u"[-10.00, -3.41]", u"( -3.41, 3.30]", u"( 3.30, 10.00]"] | |
1195 | expected = ["[-10.00, -3.41]", "( -3.41, 3.30]", "( 3.30, 10.00]"] | |
1148 | 1196 | assert labels == expected |
1149 | 1197 | |
1150 | 1198 | @pytest.mark.parametrize("scheme", ["FISHER_JENKS", "FISHERJENKS"]) |
1167 | 1215 | legend=True, |
1168 | 1216 | ) |
1169 | 1217 | labels = [t.get_text() for t in ax.get_legend().get_texts()] |
1170 | expected = [" 140.00, 9961396.00", " 9961396.00, 1379302771.00"] | |
1218 | expected = [ | |
1219 | s.split("|")[0][1:-2] | |
1220 | for s in str(self.mc.Percentiles(self.df["pop_est"], pct=[50, 100])).split( | |
1221 | "\n" | |
1222 | )[4:] | |
1223 | ] | |
1224 | ||
1171 | 1225 | assert labels == expected |
1172 | 1226 | |
1173 | 1227 | def test_invalid_scheme(self): |
1377 | 1431 | ) |
1378 | 1432 | |
1379 | 1433 | def test_points(self): |
1380 | # failing with matplotlib 1.4.3 (edge stays black even when specified) | |
1381 | pytest.importorskip("matplotlib", "1.5.0") | |
1382 | ||
1383 | 1434 | from geopandas.plotting import _plot_point_collection, plot_point_collection |
1384 | 1435 | from matplotlib.collections import PathCollection |
1385 | 1436 | |
1434 | 1485 | with pytest.raises((TypeError, ValueError)): |
1435 | 1486 | _plot_point_collection(ax, self.points, color="not color") |
1436 | 1487 | |
1437 | # check DeprecationWarning | |
1438 | with pytest.warns(DeprecationWarning): | |
1488 | # check FutureWarning | |
1489 | with pytest.warns(FutureWarning): | |
1439 | 1490 | plot_point_collection(ax, self.points) |
1440 | 1491 | |
1441 | 1492 | def test_points_values(self): |
1507 | 1558 | # not a color |
1508 | 1559 | with pytest.raises((TypeError, ValueError)): |
1509 | 1560 | _plot_linestring_collection(ax, self.lines, color="not color") |
1510 | # check DeprecationWarning | |
1511 | with pytest.warns(DeprecationWarning): | |
1561 | # check FutureWarning | |
1562 | with pytest.warns(FutureWarning): | |
1512 | 1563 | plot_linestring_collection(ax, self.lines) |
1513 | 1564 | |
1514 | 1565 | def test_linestrings_values(self): |
1599 | 1650 | # not a color |
1600 | 1651 | with pytest.raises((TypeError, ValueError)): |
1601 | 1652 | _plot_polygon_collection(ax, self.polygons, color="not color") |
1602 | # check DeprecationWarning | |
1603 | with pytest.warns(DeprecationWarning): | |
1653 | # check FutureWarning | |
1654 | with pytest.warns(FutureWarning): | |
1604 | 1655 | plot_polygon_collection(ax, self.polygons) |
1605 | 1656 | |
1606 | 1657 | def test_polygons_values(self): |
1836 | 1887 | Previously, we did `fig.axes[1]`, but in matplotlib 3.4 the order switched |
1837 | 1888 | and the colorbar ax was first and subplot ax second. |
1838 | 1889 | """ |
1839 | if matplotlib.__version__ < LooseVersion("3.0.0"): | |
1840 | if label == "<colorbar>": | |
1841 | return fig.axes[1] | |
1842 | elif label == "": | |
1843 | return fig.axes[0] | |
1844 | 1890 | for ax in fig.axes: |
1845 | 1891 | if ax.get_label() == label: |
1846 | 1892 | return ax |
172 | 172 | assert subset1.sindex is original_index |
173 | 173 | subset2 = self.df[["A", "geom"]] |
174 | 174 | assert subset2.sindex is original_index |
175 | ||
176 | def test_rebuild_on_update_inplace(self): | |
177 | gdf = self.df.copy() | |
178 | old_sindex = gdf.sindex | |
179 | # sorting in place | |
180 | gdf.sort_values("A", ascending=False, inplace=True) | |
181 | # spatial index should be invalidated | |
182 | assert not gdf.has_sindex | |
183 | new_sindex = gdf.sindex | |
184 | # and should be different | |
185 | assert new_sindex is not old_sindex | |
186 | ||
187 | # sorting should still have happened though | |
188 | assert gdf.index.tolist() == [4, 3, 2, 1, 0] | |
189 | ||
190 | @pytest.mark.skipif(not compat.PANDAS_GE_11, reason="fails on pd<1.1.0") | |
191 | def test_update_inplace_no_rebuild(self): | |
192 | gdf = self.df.copy() | |
193 | old_sindex = gdf.sindex | |
194 | gdf.rename(columns={"A": "AA"}, inplace=True) | |
195 | # a rename shouldn't invalidate the index | |
196 | assert gdf.has_sindex | |
197 | # and the "new" should be the same | |
198 | new_sindex = gdf.sindex | |
199 | assert old_sindex is new_sindex | |
175 | 200 | |
176 | 201 | |
177 | 202 | # Skip to accommodate Shapely geometries being unhashable |
0 | from .crs import explicit_crs_from_epsg | |
1 | 0 | from .geocoding import geocode, reverse_geocode |
2 | 1 | from .overlay import overlay |
3 | 2 | from .sjoin import sjoin, sjoin_nearest |
6 | 5 | |
7 | 6 | __all__ = [ |
8 | 7 | "collect", |
9 | "explicit_crs_from_epsg", | |
10 | 8 | "geocode", |
11 | 9 | "overlay", |
12 | 10 | "reverse_geocode", |
6 | 6 | """ |
7 | 7 | import warnings |
8 | 8 | |
9 | from shapely.geometry import Polygon, MultiPolygon | |
9 | import pandas.api.types | |
10 | from shapely.geometry import Polygon, MultiPolygon, box | |
10 | 11 | |
11 | 12 | from geopandas import GeoDataFrame, GeoSeries |
12 | 13 | from geopandas.array import _check_crs, _crs_mismatch_warn |
13 | 14 | |
14 | 15 | |
15 | def _clip_gdf_with_polygon(gdf, poly): | |
16 | """Clip geometry to the polygon extent. | |
17 | ||
18 | Clip an input GeoDataFrame to the polygon extent of the poly | |
16 | def _mask_is_list_like_rectangle(mask): | |
17 | return pandas.api.types.is_list_like(mask) and not isinstance( | |
18 | mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon) | |
19 | ) | |
20 | ||
21 | ||
22 | def _clip_gdf_with_mask(gdf, mask): | |
23 | """Clip geometry to the polygon/rectangle extent. | |
24 | ||
25 | Clip an input GeoDataFrame to the polygon extent of the polygon | |
19 | 26 | parameter. |
20 | 27 | |
21 | 28 | Parameters |
23 | 30 | gdf : GeoDataFrame, GeoSeries |
24 | 31 | Dataframe to clip. |
25 | 32 | |
26 | poly : (Multi)Polygon | |
27 | Reference polygon for clipping. | |
33 | mask : (Multi)Polygon, list-like | |
34 | Reference polygon/rectangle for clipping. | |
28 | 35 | |
29 | 36 | Returns |
30 | 37 | ------- |
31 | 38 | GeoDataFrame |
32 | 39 | The returned GeoDataFrame is a clipped subset of gdf |
33 | that intersects with poly. | |
40 | that intersects with polygon/rectangle. | |
34 | 41 | """ |
35 | gdf_sub = gdf.iloc[gdf.sindex.query(poly, predicate="intersects")] | |
42 | clipping_by_rectangle = _mask_is_list_like_rectangle(mask) | |
43 | if clipping_by_rectangle: | |
44 | intersection_polygon = box(*mask) | |
45 | else: | |
46 | intersection_polygon = mask | |
47 | ||
48 | gdf_sub = gdf.iloc[gdf.sindex.query(intersection_polygon, predicate="intersects")] | |
36 | 49 | |
37 | 50 | # For performance reasons points don't need to be intersected with poly |
38 | 51 | non_point_mask = gdf_sub.geom_type != "Point" |
44 | 57 | # Clip the data with the polygon |
45 | 58 | if isinstance(gdf_sub, GeoDataFrame): |
46 | 59 | clipped = gdf_sub.copy() |
47 | clipped.loc[ | |
48 | non_point_mask, clipped._geometry_column_name | |
49 | ] = gdf_sub.geometry.values[non_point_mask].intersection(poly) | |
60 | if clipping_by_rectangle: | |
61 | clipped.loc[ | |
62 | non_point_mask, clipped._geometry_column_name | |
63 | ] = gdf_sub.geometry.values[non_point_mask].clip_by_rect(*mask) | |
64 | else: | |
65 | clipped.loc[ | |
66 | non_point_mask, clipped._geometry_column_name | |
67 | ] = gdf_sub.geometry.values[non_point_mask].intersection(mask) | |
50 | 68 | else: |
51 | 69 | # GeoSeries |
52 | 70 | clipped = gdf_sub.copy() |
53 | clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(poly) | |
54 | ||
71 | if clipping_by_rectangle: | |
72 | clipped[non_point_mask] = gdf_sub.values[non_point_mask].clip_by_rect(*mask) | |
73 | else: | |
74 | clipped[non_point_mask] = gdf_sub.values[non_point_mask].intersection(mask) | |
75 | ||
76 | if clipping_by_rectangle: | |
77 | # clip_by_rect might return empty geometry collections in edge cases | |
78 | clipped = clipped[~clipped.is_empty] | |
55 | 79 | return clipped |
56 | 80 | |
57 | 81 | |
59 | 83 | """Clip points, lines, or polygon geometries to the mask extent. |
60 | 84 | |
61 | 85 | Both layers must be in the same Coordinate Reference System (CRS). |
62 | The `gdf` will be clipped to the full extent of the clip object. | |
63 | ||
64 | If there are multiple polygons in mask, data from `gdf` will be | |
86 | The ``gdf`` will be clipped to the full extent of the clip object. | |
87 | ||
88 | If there are multiple polygons in mask, data from ``gdf`` will be | |
65 | 89 | clipped to the total boundary of all polygons in mask. |
90 | ||
91 | If the ``mask`` is list-like with four elements ``(minx, miny, maxx, maxy)``, a | |
92 | faster rectangle clipping algorithm will be used. Note that this can lead to | |
93 | slightly different results in edge cases, e.g. if a line would be reduced to a | |
94 | point, this point might not be returned. | |
95 | The geometry is clipped in a fast but possibly dirty way. The output is not | |
96 | guaranteed to be valid. No exceptions will be raised for topological errors. | |
66 | 97 | |
67 | 98 | Parameters |
68 | 99 | ---------- |
69 | 100 | gdf : GeoDataFrame or GeoSeries |
70 | 101 | Vector layer (point, line, polygon) to be clipped to mask. |
71 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon | |
72 | Polygon vector layer used to clip `gdf`. | |
102 | mask : GeoDataFrame, GeoSeries, (Multi)Polygon, list-like | |
103 | Polygon vector layer used to clip ``gdf``. | |
73 | 104 | The mask's geometry is dissolved into one geometric feature |
74 | and intersected with `gdf`. | |
105 | and intersected with ``gdf``. | |
106 | If the mask is list-like with four elements ``(minx, miny, maxx, maxy)``, | |
107 | ``clip`` will use a faster rectangle clipping (:meth:`~GeoSeries.clip_by_rect`), | |
108 | possibly leading to slightly different results. | |
75 | 109 | keep_geom_type : boolean, default False |
76 | 110 | If True, return only geometries of original type in case of intersection |
77 | 111 | resulting in multiple geometry types or GeometryCollections. |
80 | 114 | Returns |
81 | 115 | ------- |
82 | 116 | GeoDataFrame or GeoSeries |
83 | Vector data (points, lines, polygons) from `gdf` clipped to | |
117 | Vector data (points, lines, polygons) from ``gdf`` clipped to | |
84 | 118 | polygon boundary from mask. |
85 | 119 | |
86 | 120 | See also |
109 | 143 | "'gdf' should be GeoDataFrame or GeoSeries, got {}".format(type(gdf)) |
110 | 144 | ) |
111 | 145 | |
112 | if not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)): | |
146 | mask_is_list_like = _mask_is_list_like_rectangle(mask) | |
147 | if ( | |
148 | not isinstance(mask, (GeoDataFrame, GeoSeries, Polygon, MultiPolygon)) | |
149 | and not mask_is_list_like | |
150 | ): | |
113 | 151 | raise TypeError( |
114 | "'mask' should be GeoDataFrame, GeoSeries or" | |
115 | "(Multi)Polygon, got {}".format(type(mask)) | |
152 | "'mask' should be GeoDataFrame, GeoSeries," | |
153 | f"(Multi)Polygon or list-like, got {type(mask)}" | |
154 | ) | |
155 | ||
156 | if mask_is_list_like and len(mask) != 4: | |
157 | raise TypeError( | |
158 | "If 'mask' is list-like, it must have four values (minx, miny, maxx, maxy)" | |
116 | 159 | ) |
117 | 160 | |
118 | 161 | if isinstance(mask, (GeoDataFrame, GeoSeries)): |
121 | 164 | |
122 | 165 | if isinstance(mask, (GeoDataFrame, GeoSeries)): |
123 | 166 | box_mask = mask.total_bounds |
167 | elif mask_is_list_like: | |
168 | box_mask = mask | |
124 | 169 | else: |
125 | 170 | box_mask = mask.bounds |
126 | 171 | box_gdf = gdf.total_bounds |
131 | 176 | return gdf.iloc[:0] |
132 | 177 | |
133 | 178 | if isinstance(mask, (GeoDataFrame, GeoSeries)): |
134 | poly = mask.geometry.unary_union | |
135 | else: | |
136 | poly = mask | |
137 | ||
138 | clipped = _clip_gdf_with_polygon(gdf, poly) | |
179 | combined_mask = mask.geometry.unary_union | |
180 | else: | |
181 | combined_mask = mask | |
182 | ||
183 | clipped = _clip_gdf_with_mask(gdf, combined_mask) | |
139 | 184 | |
140 | 185 | if keep_geom_type: |
141 | 186 | geomcoll_concat = (clipped.geom_type == "GeometryCollection").any() |
0 | import warnings | |
1 | ||
2 | from pyproj import CRS | |
3 | ||
4 | ||
5 | def explicit_crs_from_epsg(crs=None, epsg=None): | |
6 | """ | |
7 | Gets full/explicit CRS from EPSG code provided. | |
8 | ||
9 | Parameters | |
10 | ---------- | |
11 | crs : dict or string, default None | |
12 | An existing crs dict or Proj string with the 'init' key specifying an EPSG code | |
13 | epsg : string or int, default None | |
14 | The EPSG code to lookup | |
15 | """ | |
16 | warnings.warn( | |
17 | "explicit_crs_from_epsg is deprecated. " | |
18 | "You can set the epsg on the GeoDataFrame (gdf) using gdf.crs=epsg", | |
19 | FutureWarning, | |
20 | stacklevel=2, | |
21 | ) | |
22 | if crs is not None: | |
23 | return CRS.from_user_input(crs) | |
24 | elif epsg is not None: | |
25 | return CRS.from_epsg(epsg) | |
26 | raise ValueError("Must pass either crs or epsg.") | |
27 | ||
28 | ||
29 | def epsg_from_crs(crs): | |
30 | """ | |
31 | Returns an epsg code from a crs dict or Proj string. | |
32 | ||
33 | Parameters | |
34 | ---------- | |
35 | crs : dict or string, default None | |
36 | A crs dict or Proj string | |
37 | ||
38 | """ | |
39 | warnings.warn( | |
40 | "epsg_from_crs is deprecated. " | |
41 | "You can get the epsg code from GeoDataFrame (gdf) " | |
42 | "using gdf.crs.to_epsg()", | |
43 | FutureWarning, | |
44 | stacklevel=2, | |
45 | ) | |
46 | crs = CRS.from_user_input(crs) | |
47 | if "init=epsg" in crs.to_string().lower(): | |
48 | epsg_code = crs.to_epsg(0) | |
49 | else: | |
50 | epsg_code = crs.to_epsg() | |
51 | return epsg_code | |
52 | ||
53 | ||
54 | def get_epsg_file_contents(): | |
55 | warnings.warn("get_epsg_file_contents is deprecated.", FutureWarning, stacklevel=2) | |
56 | return "" |
138 | 138 | # keep geometry column last |
139 | 139 | columns = list(dfunion.columns) |
140 | 140 | columns.remove("geometry") |
141 | columns = columns + ["geometry"] | |
141 | columns.append("geometry") | |
142 | 142 | return dfunion.reindex(columns=columns) |
143 | 143 | |
144 | 144 |
106 | 106 | "A non-default value for `predicate` was passed" |
107 | 107 | f' (got `predicate="{predicate}"`' |
108 | 108 | f' in combination with `op="{op}"`).' |
109 | " The value of `predicate` will be overriden by the value of `op`," | |
109 | " The value of `predicate` will be overridden by the value of `op`," | |
110 | 110 | " , which may result in unexpected behavior." |
111 | 111 | f"\n{deprecation_message}" |
112 | 112 | ) |
342 | 342 | ) |
343 | 343 | .set_index(index_right) |
344 | 344 | .drop(["_key_left", "_key_right"], axis=1) |
345 | .set_geometry(right_df.geometry.name) | |
345 | 346 | ) |
346 | 347 | if isinstance(index_right, list): |
347 | 348 | joined.index.names = right_index_name |
415 | 416 | |
416 | 417 | Results will include multiple output records for a single input record |
417 | 418 | where there are multiple equidistant nearest or intersected neighbors. |
419 | ||
420 | Distance is calculated in CRS units and can be returned using the | |
421 | `distance_col` parameter. | |
418 | 422 | |
419 | 423 | See the User Guide page |
420 | 424 | https://geopandas.readthedocs.io/en/latest/docs/user_guide/mergingdata.html |
502 | 506 | |
503 | 507 | Notes |
504 | 508 | ----- |
505 | Since this join relies on distances, results will be innaccurate | |
509 | Since this join relies on distances, results will be inaccurate | |
506 | 510 | if your geometries are in a geographic CRS. |
507 | 511 | |
508 | 512 | Every operation in GeoPandas is planar, i.e. the potential third |
0 | 0 | """Tests for the clip module.""" |
1 | 1 | |
2 | 2 | import warnings |
3 | from distutils.version import LooseVersion | |
3 | from packaging.version import Version | |
4 | 4 | |
5 | 5 | import numpy as np |
6 | 6 | import pandas as pd |
13 | 13 | LinearRing, |
14 | 14 | GeometryCollection, |
15 | 15 | MultiPoint, |
16 | box, | |
16 | 17 | ) |
17 | 18 | |
18 | 19 | import geopandas |
21 | 22 | from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal |
22 | 23 | import pytest |
23 | 24 | |
25 | from geopandas.tools.clip import _mask_is_list_like_rectangle | |
24 | 26 | |
25 | 27 | pytestmark = pytest.mark.skip_no_sindex |
26 | pandas_133 = pd.__version__ == LooseVersion("1.3.3") | |
28 | pandas_133 = Version(pd.__version__) == Version("1.3.3") | |
29 | mask_variants_single_rectangle = [ | |
30 | "single_rectangle_gdf", | |
31 | "single_rectangle_gdf_list_bounds", | |
32 | "single_rectangle_gdf_tuple_bounds", | |
33 | "single_rectangle_gdf_array_bounds", | |
34 | ] | |
35 | mask_variants_large_rectangle = [ | |
36 | "larger_single_rectangle_gdf", | |
37 | "larger_single_rectangle_gdf_bounds", | |
38 | ] | |
27 | 39 | |
28 | 40 | |
29 | 41 | @pytest.fixture |
59 | 71 | gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857") |
60 | 72 | gdf["attr2"] = "site-boundary" |
61 | 73 | return gdf |
74 | ||
75 | ||
76 | @pytest.fixture | |
77 | def single_rectangle_gdf_tuple_bounds(single_rectangle_gdf): | |
78 | """Bounds of the created single rectangle""" | |
79 | return tuple(single_rectangle_gdf.total_bounds) | |
80 | ||
81 | ||
82 | @pytest.fixture | |
83 | def single_rectangle_gdf_list_bounds(single_rectangle_gdf): | |
84 | """Bounds of the created single rectangle""" | |
85 | return list(single_rectangle_gdf.total_bounds) | |
86 | ||
87 | ||
88 | @pytest.fixture | |
89 | def single_rectangle_gdf_array_bounds(single_rectangle_gdf): | |
90 | """Bounds of the created single rectangle""" | |
91 | return single_rectangle_gdf.total_bounds | |
62 | 92 | |
63 | 93 | |
64 | 94 | @pytest.fixture |
72 | 102 | gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:3857") |
73 | 103 | gdf["attr2"] = ["study area"] |
74 | 104 | return gdf |
105 | ||
106 | ||
107 | @pytest.fixture | |
108 | def larger_single_rectangle_gdf_bounds(larger_single_rectangle_gdf): | |
109 | """Bounds of the created single rectangle""" | |
110 | return tuple(larger_single_rectangle_gdf.total_bounds) | |
75 | 111 | |
76 | 112 | |
77 | 113 | @pytest.fixture |
173 | 209 | with pytest.raises(TypeError): |
174 | 210 | clip((2, 3), single_rectangle_gdf) |
175 | 211 | with pytest.raises(TypeError): |
176 | clip(single_rectangle_gdf, (2, 3)) | |
177 | ||
178 | ||
179 | def test_returns_gdf(point_gdf, single_rectangle_gdf): | |
180 | """Test that function returns a GeoDataFrame (or GDF-like) object.""" | |
181 | out = clip(point_gdf, single_rectangle_gdf) | |
182 | assert isinstance(out, GeoDataFrame) | |
183 | ||
184 | ||
185 | def test_returns_series(point_gdf, single_rectangle_gdf): | |
186 | """Test that function returns a GeoSeries if GeoSeries is passed.""" | |
187 | out = clip(point_gdf.geometry, single_rectangle_gdf) | |
188 | assert isinstance(out, GeoSeries) | |
212 | clip(single_rectangle_gdf, "foobar") | |
213 | with pytest.raises(TypeError): | |
214 | clip(single_rectangle_gdf, (1, 2, 3)) | |
215 | with pytest.raises(TypeError): | |
216 | clip(single_rectangle_gdf, (1, 2, 3, 4, 5)) | |
189 | 217 | |
190 | 218 | |
191 | 219 | def test_non_overlapping_geoms(): |
202 | 230 | assert_geoseries_equal(out2, GeoSeries(crs=unit_gdf.crs)) |
203 | 231 | |
204 | 232 | |
205 | def test_clip_points(point_gdf, single_rectangle_gdf): | |
206 | """Test clipping a points GDF with a generic polygon geometry.""" | |
207 | clip_pts = clip(point_gdf, single_rectangle_gdf) | |
208 | pts = np.array([[2, 2], [3, 4], [9, 8]]) | |
209 | exp = GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857") | |
210 | assert_geodataframe_equal(clip_pts, exp) | |
211 | ||
212 | ||
213 | def test_clip_points_geom_col_rename(point_gdf, single_rectangle_gdf): | |
214 | """Test clipping a points GDF with a generic polygon geometry.""" | |
215 | point_gdf_geom_col_rename = point_gdf.rename_geometry("geometry2") | |
216 | clip_pts = clip(point_gdf_geom_col_rename, single_rectangle_gdf) | |
217 | pts = np.array([[2, 2], [3, 4], [9, 8]]) | |
218 | exp = GeoDataFrame( | |
219 | [Point(xy) for xy in pts], | |
220 | columns=["geometry2"], | |
221 | crs="EPSG:3857", | |
222 | geometry="geometry2", | |
223 | ) | |
224 | assert_geodataframe_equal(clip_pts, exp) | |
225 | ||
226 | ||
227 | def test_clip_poly(buffered_locations, single_rectangle_gdf): | |
228 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
229 | clipped_poly = clip(buffered_locations, single_rectangle_gdf) | |
230 | assert len(clipped_poly.geometry) == 3 | |
231 | assert all(clipped_poly.geom_type == "Polygon") | |
232 | ||
233 | ||
234 | def test_clip_poly_geom_col_rename(buffered_locations, single_rectangle_gdf): | |
235 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
236 | ||
237 | poly_gdf_geom_col_rename = buffered_locations.rename_geometry("geometry2") | |
238 | clipped_poly = clip(poly_gdf_geom_col_rename, single_rectangle_gdf) | |
239 | assert len(clipped_poly.geometry) == 3 | |
240 | assert "geometry" not in clipped_poly.keys() | |
241 | assert "geometry2" in clipped_poly.keys() | |
242 | ||
243 | ||
244 | def test_clip_poly_series(buffered_locations, single_rectangle_gdf): | |
245 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
246 | clipped_poly = clip(buffered_locations.geometry, single_rectangle_gdf) | |
247 | assert len(clipped_poly) == 3 | |
248 | assert all(clipped_poly.geom_type == "Polygon") | |
233 | @pytest.mark.parametrize("mask_fixture_name", mask_variants_single_rectangle) | |
234 | class TestClipWithSingleRectangleGdf: | |
235 | @pytest.fixture | |
236 | def mask(self, mask_fixture_name, request): | |
237 | return request.getfixturevalue(mask_fixture_name) | |
238 | ||
239 | def test_returns_gdf(self, point_gdf, mask): | |
240 | """Test that function returns a GeoDataFrame (or GDF-like) object.""" | |
241 | out = clip(point_gdf, mask) | |
242 | assert isinstance(out, GeoDataFrame) | |
243 | ||
244 | def test_returns_series(self, point_gdf, mask): | |
245 | """Test that function returns a GeoSeries if GeoSeries is passed.""" | |
246 | out = clip(point_gdf.geometry, mask) | |
247 | assert isinstance(out, GeoSeries) | |
248 | ||
249 | def test_clip_points(self, point_gdf, mask): | |
250 | """Test clipping a points GDF with a generic polygon geometry.""" | |
251 | clip_pts = clip(point_gdf, mask) | |
252 | pts = np.array([[2, 2], [3, 4], [9, 8]]) | |
253 | exp = GeoDataFrame( | |
254 | [Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:3857" | |
255 | ) | |
256 | assert_geodataframe_equal(clip_pts, exp) | |
257 | ||
258 | def test_clip_points_geom_col_rename(self, point_gdf, mask): | |
259 | """Test clipping a points GDF with a generic polygon geometry.""" | |
260 | point_gdf_geom_col_rename = point_gdf.rename_geometry("geometry2") | |
261 | clip_pts = clip(point_gdf_geom_col_rename, mask) | |
262 | pts = np.array([[2, 2], [3, 4], [9, 8]]) | |
263 | exp = GeoDataFrame( | |
264 | [Point(xy) for xy in pts], | |
265 | columns=["geometry2"], | |
266 | crs="EPSG:3857", | |
267 | geometry="geometry2", | |
268 | ) | |
269 | assert_geodataframe_equal(clip_pts, exp) | |
270 | ||
271 | def test_clip_poly(self, buffered_locations, mask): | |
272 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
273 | clipped_poly = clip(buffered_locations, mask) | |
274 | assert len(clipped_poly.geometry) == 3 | |
275 | assert all(clipped_poly.geom_type == "Polygon") | |
276 | ||
277 | def test_clip_poly_geom_col_rename(self, buffered_locations, mask): | |
278 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
279 | ||
280 | poly_gdf_geom_col_rename = buffered_locations.rename_geometry("geometry2") | |
281 | clipped_poly = clip(poly_gdf_geom_col_rename, mask) | |
282 | assert len(clipped_poly.geometry) == 3 | |
283 | assert "geometry" not in clipped_poly.keys() | |
284 | assert "geometry2" in clipped_poly.keys() | |
285 | ||
286 | def test_clip_poly_series(self, buffered_locations, mask): | |
287 | """Test clipping a polygon GDF with a generic polygon geometry.""" | |
288 | clipped_poly = clip(buffered_locations.geometry, mask) | |
289 | assert len(clipped_poly) == 3 | |
290 | assert all(clipped_poly.geom_type == "Polygon") | |
291 | ||
292 | @pytest.mark.xfail(pandas_133, reason="Regression in pandas 1.3.3 (GH #2101)") | |
293 | def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask): | |
294 | """Test a multi poly object where the return includes a sliver. | |
295 | Also the bounds of the object should == the bounds of the clip object | |
296 | if they fully overlap (as they do in these fixtures).""" | |
297 | clipped = clip(multi_poly_gdf, mask, keep_geom_type=True) | |
298 | expected_bounds = ( | |
299 | mask if _mask_is_list_like_rectangle(mask) else mask.total_bounds | |
300 | ) | |
301 | assert np.array_equal(clipped.total_bounds, expected_bounds) | |
302 | # Assert returned data is a not geometry collection | |
303 | assert (clipped.geom_type.isin(["Polygon", "MultiPolygon"])).all() | |
304 | ||
305 | def test_clip_multiline(self, multi_line, mask): | |
306 | """Test that clipping a multiline feature with a poly returns expected | |
307 | output.""" | |
308 | clipped = clip(multi_line, mask) | |
309 | assert clipped.geom_type[0] == "MultiLineString" | |
310 | ||
311 | def test_clip_multipoint(self, multi_point, mask): | |
312 | """Clipping a multipoint feature with a polygon works as expected. | |
313 | should return a geodataframe with a single multi point feature""" | |
314 | clipped = clip(multi_point, mask) | |
315 | assert clipped.geom_type[0] == "MultiPoint" | |
316 | assert hasattr(clipped, "attr") | |
317 | # All points should intersect the clip geom | |
318 | assert len(clipped) == 2 | |
319 | clipped_mutltipoint = MultiPoint( | |
320 | [ | |
321 | Point(2, 2), | |
322 | Point(3, 4), | |
323 | Point(9, 8), | |
324 | ] | |
325 | ) | |
326 | assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt | |
327 | shape_for_points = ( | |
328 | box(*mask) if _mask_is_list_like_rectangle(mask) else mask.unary_union | |
329 | ) | |
330 | assert all(clipped.intersects(shape_for_points)) | |
331 | ||
332 | def test_clip_lines(self, two_line_gdf, mask): | |
333 | """Test what happens when you give the clip_extent a line GDF.""" | |
334 | clip_line = clip(two_line_gdf, mask) | |
335 | assert len(clip_line.geometry) == 2 | |
336 | ||
337 | def test_mixed_geom(self, mixed_gdf, mask): | |
338 | """Test clipping a mixed GeoDataFrame""" | |
339 | clipped = clip(mixed_gdf, mask) | |
340 | assert ( | |
341 | clipped.geom_type[0] == "Point" | |
342 | and clipped.geom_type[1] == "Polygon" | |
343 | and clipped.geom_type[2] == "LineString" | |
344 | ) | |
345 | ||
346 | def test_mixed_series(self, mixed_gdf, mask): | |
347 | """Test clipping a mixed GeoSeries""" | |
348 | clipped = clip(mixed_gdf.geometry, mask) | |
349 | assert ( | |
350 | clipped.geom_type[0] == "Point" | |
351 | and clipped.geom_type[1] == "Polygon" | |
352 | and clipped.geom_type[2] == "LineString" | |
353 | ) | |
354 | ||
355 | def test_clip_warning_no_extra_geoms(self, buffered_locations, mask): | |
356 | """Test a user warning is provided if no new geometry types are found.""" | |
357 | with pytest.warns(UserWarning): | |
358 | clip(buffered_locations, mask, True) | |
359 | warnings.warn( | |
360 | "keep_geom_type was called when no extra geometry types existed.", | |
361 | UserWarning, | |
362 | ) | |
363 | ||
364 | def test_clip_with_line_extra_geom(self, sliver_line, mask): | |
365 | """When the output of a clipped line returns a geom collection, | |
366 | and keep_geom_type is True, no geometry collections should be returned.""" | |
367 | clipped = clip(sliver_line, mask, keep_geom_type=True) | |
368 | assert len(clipped.geometry) == 1 | |
369 | # Assert returned data is a not geometry collection | |
370 | assert not (clipped.geom_type == "GeometryCollection").any() | |
371 | ||
372 | def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask): | |
373 | """Test clip when intersection is empty and boxes do not overlap.""" | |
374 | clipped = clip(pointsoutside_nooverlap_gdf, mask) | |
375 | assert len(clipped) == 0 | |
376 | ||
377 | def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask): | |
378 | """Test clip when intersection is empty and boxes do overlap.""" | |
379 | clipped = clip(pointsoutside_overlap_gdf, mask) | |
380 | assert len(clipped) == 0 | |
381 | ||
382 | def test_warning_extra_geoms_mixed(self, mixed_gdf, mask): | |
383 | """Test the correct warnings are raised if keep_geom_type is | |
384 | called on a mixed GDF""" | |
385 | with pytest.warns(UserWarning): | |
386 | clip(mixed_gdf, mask, keep_geom_type=True) | |
387 | ||
388 | def test_warning_geomcoll(self, geomcol_gdf, mask): | |
389 | """Test the correct warnings are raised if keep_geom_type is | |
390 | called on a GDF with GeometryCollection""" | |
391 | with pytest.warns(UserWarning): | |
392 | clip(geomcol_gdf, mask, keep_geom_type=True) | |
393 | ||
394 | ||
395 | def test_clip_line_keep_slivers(sliver_line, single_rectangle_gdf): | |
396 | """Test the correct output if a point is returned | |
397 | from a line only geometry type.""" | |
398 | clipped = clip(sliver_line, single_rectangle_gdf) | |
399 | # Assert returned data is a geometry collection given sliver geoms | |
400 | assert "Point" == clipped.geom_type[0] | |
401 | assert "LineString" == clipped.geom_type[1] | |
249 | 402 | |
250 | 403 | |
251 | 404 | @pytest.mark.xfail(pandas_133, reason="Regression in pandas 1.3.3 (GH #2101)") |
259 | 412 | assert "GeometryCollection" in clipped.geom_type[0] |
260 | 413 | |
261 | 414 | |
262 | @pytest.mark.xfail(pandas_133, reason="Regression in pandas 1.3.3 (GH #2101)") | |
263 | def test_clip_multipoly_keep_geom_type(multi_poly_gdf, single_rectangle_gdf): | |
264 | """Test a multi poly object where the return includes a sliver. | |
265 | Also the bounds of the object should == the bounds of the clip object | |
266 | if they fully overlap (as they do in these fixtures).""" | |
267 | clipped = clip(multi_poly_gdf, single_rectangle_gdf, keep_geom_type=True) | |
268 | assert np.array_equal(clipped.total_bounds, single_rectangle_gdf.total_bounds) | |
269 | # Assert returned data is a not geometry collection | |
270 | assert (clipped.geom_type == "Polygon").any() | |
271 | ||
272 | ||
273 | def test_clip_single_multipoly_no_extra_geoms( | |
274 | buffered_locations, larger_single_rectangle_gdf | |
275 | ): | |
276 | """When clipping a multi-polygon feature, no additional geom types | |
277 | should be returned.""" | |
278 | multi = buffered_locations.dissolve(by="type").reset_index() | |
279 | clipped = clip(multi, larger_single_rectangle_gdf) | |
280 | assert clipped.geom_type[0] == "Polygon" | |
281 | ||
282 | ||
283 | def test_clip_multiline(multi_line, single_rectangle_gdf): | |
284 | """Test that clipping a multiline feature with a poly returns expected output.""" | |
285 | clipped = clip(multi_line, single_rectangle_gdf) | |
286 | assert clipped.geom_type[0] == "MultiLineString" | |
287 | ||
288 | ||
289 | def test_clip_multipoint(single_rectangle_gdf, multi_point): | |
290 | """Clipping a multipoint feature with a polygon works as expected. | |
291 | should return a geodataframe with a single multi point feature""" | |
292 | clipped = clip(multi_point, single_rectangle_gdf) | |
293 | assert clipped.geom_type[0] == "MultiPoint" | |
294 | assert hasattr(clipped, "attr") | |
295 | # All points should intersect the clip geom | |
296 | assert len(clipped) == 2 | |
297 | clipped_mutltipoint = MultiPoint( | |
298 | [ | |
299 | Point(2, 2), | |
300 | Point(3, 4), | |
301 | Point(9, 8), | |
302 | ] | |
303 | ) | |
304 | assert clipped.iloc[0].geometry.wkt == clipped_mutltipoint.wkt | |
305 | assert all(clipped.intersects(single_rectangle_gdf.unary_union)) | |
306 | ||
307 | ||
308 | def test_clip_lines(two_line_gdf, single_rectangle_gdf): | |
309 | """Test what happens when you give the clip_extent a line GDF.""" | |
310 | clip_line = clip(two_line_gdf, single_rectangle_gdf) | |
311 | assert len(clip_line.geometry) == 2 | |
312 | ||
313 | ||
314 | def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf): | |
315 | """Test clipping a polygon with a multipolygon.""" | |
316 | multi = buffered_locations.dissolve(by="type").reset_index() | |
317 | clipped = clip(single_rectangle_gdf, multi) | |
318 | assert clipped.geom_type[0] == "Polygon" | |
319 | ||
320 | ||
321 | def test_mixed_geom(mixed_gdf, single_rectangle_gdf): | |
322 | """Test clipping a mixed GeoDataFrame""" | |
323 | clipped = clip(mixed_gdf, single_rectangle_gdf) | |
324 | assert ( | |
325 | clipped.geom_type[0] == "Point" | |
326 | and clipped.geom_type[1] == "Polygon" | |
327 | and clipped.geom_type[2] == "LineString" | |
328 | ) | |
329 | ||
330 | ||
331 | def test_mixed_series(mixed_gdf, single_rectangle_gdf): | |
332 | """Test clipping a mixed GeoSeries""" | |
333 | clipped = clip(mixed_gdf.geometry, single_rectangle_gdf) | |
334 | assert ( | |
335 | clipped.geom_type[0] == "Point" | |
336 | and clipped.geom_type[1] == "Polygon" | |
337 | and clipped.geom_type[2] == "LineString" | |
338 | ) | |
339 | ||
340 | ||
341 | def test_clip_warning_no_extra_geoms(buffered_locations, single_rectangle_gdf): | |
342 | """Test a user warning is provided if no new geometry types are found.""" | |
343 | with pytest.warns(UserWarning): | |
344 | clip(buffered_locations, single_rectangle_gdf, True) | |
345 | warnings.warn( | |
346 | "keep_geom_type was called when no extra geometry types existed.", | |
347 | UserWarning, | |
348 | ) | |
415 | def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf): | |
416 | with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): | |
417 | clip(point_gdf, single_rectangle_gdf.to_crs(4326)) | |
349 | 418 | |
350 | 419 | |
351 | 420 | def test_clip_with_polygon(single_rectangle_gdf): |
360 | 429 | assert_geodataframe_equal(clipped, exp) |
361 | 430 | |
362 | 431 | |
363 | def test_clip_with_line_extra_geom(single_rectangle_gdf, sliver_line): | |
364 | """When the output of a clipped line returns a geom collection, | |
365 | and keep_geom_type is True, no geometry collections should be returned.""" | |
366 | clipped = clip(sliver_line, single_rectangle_gdf, keep_geom_type=True) | |
367 | assert len(clipped.geometry) == 1 | |
368 | # Assert returned data is a not geometry collection | |
369 | assert not (clipped.geom_type == "GeometryCollection").any() | |
370 | ||
371 | ||
372 | def test_clip_line_keep_slivers(single_rectangle_gdf, sliver_line): | |
373 | """Test the correct output if a point is returned | |
374 | from a line only geometry type.""" | |
375 | clipped = clip(sliver_line, single_rectangle_gdf) | |
376 | # Assert returned data is a geometry collection given sliver geoms | |
377 | assert "Point" == clipped.geom_type[0] | |
378 | assert "LineString" == clipped.geom_type[1] | |
379 | ||
380 | ||
381 | def test_clip_no_box_overlap(pointsoutside_nooverlap_gdf, single_rectangle_gdf): | |
382 | """Test clip when intersection is empty and boxes do not overlap.""" | |
383 | clipped = clip(pointsoutside_nooverlap_gdf, single_rectangle_gdf) | |
384 | assert len(clipped) == 0 | |
385 | ||
386 | ||
387 | def test_clip_box_overlap(pointsoutside_overlap_gdf, single_rectangle_gdf): | |
388 | """Test clip when intersection is empty and boxes do overlap.""" | |
389 | clipped = clip(pointsoutside_overlap_gdf, single_rectangle_gdf) | |
390 | assert len(clipped) == 0 | |
391 | ||
392 | ||
393 | def test_warning_extra_geoms_mixed(single_rectangle_gdf, mixed_gdf): | |
394 | """Test the correct warnings are raised if keep_geom_type is | |
395 | called on a mixed GDF""" | |
396 | with pytest.warns(UserWarning): | |
397 | clip(mixed_gdf, single_rectangle_gdf, keep_geom_type=True) | |
398 | ||
399 | ||
400 | def test_warning_geomcoll(single_rectangle_gdf, geomcol_gdf): | |
401 | """Test the correct warnings are raised if keep_geom_type is | |
402 | called on a GDF with GeometryCollection""" | |
403 | with pytest.warns(UserWarning): | |
404 | clip(geomcol_gdf, single_rectangle_gdf, keep_geom_type=True) | |
405 | ||
406 | ||
407 | def test_warning_crs_mismatch(point_gdf, single_rectangle_gdf): | |
408 | with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): | |
409 | clip(point_gdf, single_rectangle_gdf.to_crs(4326)) | |
432 | def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf): | |
433 | """Test clipping a polygon with a multipolygon.""" | |
434 | multi = buffered_locations.dissolve(by="type").reset_index() | |
435 | clipped = clip(single_rectangle_gdf, multi) | |
436 | assert clipped.geom_type[0] == "Polygon" | |
437 | ||
438 | ||
439 | @pytest.mark.parametrize( | |
440 | "mask_fixture_name", | |
441 | mask_variants_large_rectangle, | |
442 | ) | |
443 | def test_clip_single_multipoly_no_extra_geoms( | |
444 | buffered_locations, mask_fixture_name, request | |
445 | ): | |
446 | """When clipping a multi-polygon feature, no additional geom types | |
447 | should be returned.""" | |
448 | masks = request.getfixturevalue(mask_fixture_name) | |
449 | multi = buffered_locations.dissolve(by="type").reset_index() | |
450 | clipped = clip(multi, masks) | |
451 | assert clipped.geom_type[0] == "Polygon" |
0 | from distutils.version import LooseVersion | |
0 | from packaging.version import Version | |
1 | 1 | import math |
2 | 2 | from typing import Sequence |
3 | 3 | from geopandas.testing import assert_geodataframe_equal |
136 | 136 | if op != predicate: |
137 | 137 | warntype = UserWarning |
138 | 138 | match = ( |
139 | "`predicate` will be overriden by the value of `op`" | |
139 | "`predicate` will be overridden by the value of `op`" | |
140 | 140 | + r"(.|\s)*" |
141 | 141 | + match |
142 | 142 | ) |
353 | 353 | exp.index.names = df2.index.names |
354 | 354 | |
355 | 355 | # GH 1364 fix of behaviour was done in pandas 1.1.0 |
356 | if predicate == "within" and str(pd.__version__) >= LooseVersion("1.1.0"): | |
356 | if predicate == "within" and Version(pd.__version__) >= Version("1.1.0"): | |
357 | 357 | exp = exp.sort_index() |
358 | 358 | |
359 | 359 | assert_frame_equal(res, exp, check_index_type=False) |
766 | 766 | [Point(1, 1), Point(0.25, 1)], |
767 | 767 | [0, 1], |
768 | 768 | [1, 0], |
769 | [math.sqrt(0.25 ** 2 + 1), 0], | |
769 | [math.sqrt(0.25**2 + 1), 0], | |
770 | 770 | ), |
771 | 771 | ( |
772 | 772 | [Point(0, 0), Point(1, 1)], |
773 | 773 | [Point(-10, -10), Point(100, 100)], |
774 | 774 | [0, 1], |
775 | 775 | [0, 0], |
776 | [math.sqrt(10 ** 2 + 10 ** 2), math.sqrt(11 ** 2 + 11 ** 2)], | |
776 | [math.sqrt(10**2 + 10**2), math.sqrt(11**2 + 11**2)], | |
777 | 777 | ), |
778 | 778 | ( |
779 | 779 | [Point(0, 0), Point(1, 1)], |
787 | 787 | [Point(1.1, 1.1), Point(0, 0)], |
788 | 788 | [0, 1, 2], |
789 | 789 | [1, 0, 1], |
790 | [0, np.sqrt(0.1 ** 2 + 0.1 ** 2), 0], | |
790 | [0, np.sqrt(0.1**2 + 0.1**2), 0], | |
791 | 791 | ), |
792 | 792 | ], |
793 | 793 | ) |
851 | 851 | [Point(-10, -10), Point(100, 100)], |
852 | 852 | [0, 1], |
853 | 853 | [0, 1], |
854 | [math.sqrt(10 ** 2 + 10 ** 2), math.sqrt(99 ** 2 + 99 ** 2)], | |
854 | [math.sqrt(10**2 + 10**2), math.sqrt(99**2 + 99**2)], | |
855 | 855 | ), |
856 | 856 | ( |
857 | 857 | [Point(0, 0), Point(1, 1)], |
858 | 858 | [Point(x, y) for x, y in zip(np.arange(10), np.arange(10))], |
859 | 859 | [0, 1] + [1] * 8, |
860 | 860 | list(range(10)), |
861 | [0, 0] + [np.sqrt(x ** 2 + x ** 2) for x in np.arange(1, 9)], | |
861 | [0, 0] + [np.sqrt(x**2 + x**2) for x in np.arange(1, 9)], | |
862 | 862 | ), |
863 | 863 | ( |
864 | 864 | [Point(0, 0), Point(1, 1), Point(0, 0)], |
865 | 865 | [Point(1.1, 1.1), Point(0, 0)], |
866 | 866 | [1, 0, 2], |
867 | 867 | [0, 1, 1], |
868 | [np.sqrt(0.1 ** 2 + 0.1 ** 2), 0, 0], | |
868 | [np.sqrt(0.1**2 + 0.1**2), 0, 0], | |
869 | 869 | ), |
870 | 870 | ], |
871 | 871 | ) |
0 | from distutils.version import LooseVersion | |
1 | ||
2 | 0 | from shapely.geometry import LineString, MultiPoint, Point |
3 | import pyproj | |
4 | from pyproj import CRS | |
5 | 1 | |
6 | 2 | from geopandas import GeoSeries |
7 | 3 | from geopandas.tools import collect |
8 | from geopandas.tools.crs import epsg_from_crs, explicit_crs_from_epsg | |
9 | 4 | |
10 | 5 | import pytest |
11 | ||
12 | ||
13 | # pyproj 2.3.1 fixed a segfault for the case working in an environment with | |
14 | # 'init' dicts (https://github.com/pyproj4/pyproj/issues/415) | |
15 | PYPROJ_LT_231 = LooseVersion(pyproj.__version__) < LooseVersion("2.3.1") | |
16 | 6 | |
17 | 7 | |
18 | 8 | class TestTools: |
58 | 48 | def test_collect_mixed_multi(self): |
59 | 49 | with pytest.raises(ValueError): |
60 | 50 | collect([self.mpc, self.mp1]) |
61 | ||
62 | @pytest.mark.skipif(PYPROJ_LT_231, reason="segfault") | |
63 | def test_epsg_from_crs(self): | |
64 | with pytest.warns(FutureWarning): | |
65 | assert epsg_from_crs({"init": "epsg:4326"}) == 4326 | |
66 | assert epsg_from_crs({"init": "EPSG:4326"}) == 4326 | |
67 | assert epsg_from_crs("+init=epsg:4326") == 4326 | |
68 | ||
69 | @pytest.mark.skipif(PYPROJ_LT_231, reason="segfault") | |
70 | def test_explicit_crs_from_epsg(self): | |
71 | with pytest.warns(FutureWarning): | |
72 | assert explicit_crs_from_epsg(epsg=4326) == CRS.from_epsg(4326) | |
73 | assert explicit_crs_from_epsg(epsg="4326") == CRS.from_epsg(4326) | |
74 | assert explicit_crs_from_epsg(crs={"init": "epsg:4326"}) == CRS.from_dict( | |
75 | {"init": "epsg:4326"} | |
76 | ) | |
77 | assert explicit_crs_from_epsg(crs="+init=epsg:4326") == CRS.from_proj4( | |
78 | "+init=epsg:4326" | |
79 | ) | |
80 | ||
81 | @pytest.mark.filterwarnings("ignore:explicit_crs_from_epsg:FutureWarning") | |
82 | def test_explicit_crs_from_epsg__missing_input(self): | |
83 | with pytest.raises(ValueError): | |
84 | explicit_crs_from_epsg() |
0 | 0 | version: 2 |
1 | build: | |
2 | os: ubuntu-20.04 | |
3 | tools: | |
4 | python: mambaforge-4.10 | |
5 | python: | |
6 | install: | |
7 | - method: pip | |
8 | path: . | |
9 | conda: | |
10 | environment: doc/environment.yml | |
1 | 11 | formats: [] |
2 | conda: | |
3 | environment: doc/environment.yml | |
4 | python: | |
5 | version: 3 | |
6 | install: | |
7 | - method: pip | |
8 | path: . |
0 | 0 | # required |
1 | 1 | fiona>=1.8 |
2 | pandas>=0.25 | |
3 | pyproj>=2.2.0 | |
4 | shapely>=1.6 | |
2 | pandas>=1.0.0 | |
3 | pyproj>=2.6.1.post1 | |
4 | shapely>=1.7 | |
5 | packaging | |
5 | 6 | |
6 | 7 | # geodatabase access |
7 | psycopg2>=2.5.1 | |
8 | SQLAlchemy>=0.8.3 | |
8 | psycopg2>=2.8.0 | |
9 | SQLAlchemy>=1.3 | |
9 | 10 | |
10 | 11 | # geocoding |
11 | 12 | geopy |
12 | 13 | |
13 | 14 | # plotting |
14 | matplotlib>=2.2 | |
15 | matplotlib>=3.2 | |
15 | 16 | mapclassify |
16 | 17 | |
17 | 18 | # testing |
20 | 21 | codecov |
21 | 22 | |
22 | 23 | # spatial access methods |
23 | rtree>=0.8 | |
24 | rtree>=0.9 | |
24 | 25 | |
25 | 26 | # styling |
26 | 27 | black |
0 | [bdist_wheel] | |
1 | universal = 1 | |
2 | ||
3 | 0 | # See the docstring in versioneer.py for instructions. Note that you must |
4 | 1 | # re-run 'versioneer.py setup' after changing this section, and commit the |
5 | 2 | # resulting files. |
3 | 3 | """ |
4 | 4 | |
5 | 5 | import os |
6 | import sys | |
6 | 7 | |
7 | try: | |
8 | from setuptools import setup | |
9 | except ImportError: | |
10 | from distutils.core import setup | |
8 | from setuptools import setup | |
11 | 9 | |
12 | import versioneer | |
10 | # ensure the current directory is on sys.path so versioneer can be imported | |
11 | # when pip uses PEP 517/518 build rules. | |
12 | # https://github.com/python-versioneer/python-versioneer/issues/193 | |
13 | sys.path.append(os.path.dirname(__file__)) | |
14 | ||
15 | import versioneer # noqa: E402 | |
13 | 16 | |
14 | 17 | LONG_DESCRIPTION = """GeoPandas is a project to add support for geographic data to |
15 | 18 | `pandas`_ objects. |
29 | 32 | INSTALL_REQUIRES = [] |
30 | 33 | else: |
31 | 34 | INSTALL_REQUIRES = [ |
32 | "pandas >= 0.25.0", | |
33 | "shapely >= 1.6", | |
35 | "pandas >= 1.0.0", | |
36 | "shapely >= 1.7, < 2", | |
34 | 37 | "fiona >= 1.8", |
35 | "pyproj >= 2.2.0", | |
38 | "pyproj >= 2.6.1.post1", | |
39 | "packaging", | |
36 | 40 | ] |
37 | 41 | |
38 | 42 | # get all data dirs in the datasets module |
56 | 60 | author="GeoPandas contributors", |
57 | 61 | author_email="kjordahl@alum.mit.edu", |
58 | 62 | url="http://geopandas.org", |
63 | project_urls={ | |
64 | "Source": "https://github.com/geopandas/geopandas", | |
65 | }, | |
59 | 66 | long_description=LONG_DESCRIPTION, |
67 | long_description_content_type="text/x-rst", | |
60 | 68 | packages=[ |
61 | 69 | "geopandas", |
62 | 70 | "geopandas.io", |
66 | 74 | "geopandas.tools.tests", |
67 | 75 | ], |
68 | 76 | package_data={"geopandas": data_files}, |
69 | python_requires=">=3.7", | |
77 | python_requires=">=3.8", | |
70 | 78 | install_requires=INSTALL_REQUIRES, |
71 | 79 | cmdclass=versioneer.get_cmdclass(), |
72 | 80 | ) |
0 | ||
1 | # Version: 0.16 | |
0 | # Version: 0.21 | |
2 | 1 | |
3 | 2 | """The Versioneer - like a rocketeer, but for versions. |
4 | 3 | |
6 | 5 | ============== |
7 | 6 | |
8 | 7 | * like a rocketeer, but for versions! |
9 | * https://github.com/warner/python-versioneer | |
8 | * https://github.com/python-versioneer/python-versioneer | |
10 | 9 | * Brian Warner |
11 | 10 | * License: Public Domain |
12 | * Compatible With: python2.6, 2.7, 3.3, 3.4, 3.5, and pypy | |
13 | * [![Latest Version] | |
14 | (https://pypip.in/version/versioneer/badge.svg?style=flat) | |
15 | ](https://pypi.python.org/pypi/versioneer/) | |
16 | * [![Build Status] | |
17 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) | |
18 | ](https://travis-ci.org/warner/python-versioneer) | |
11 | * Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3 | |
12 | * [![Latest Version][pypi-image]][pypi-url] | |
13 | * [![Build Status][travis-image]][travis-url] | |
19 | 14 | |
20 | 15 | This is a tool for managing a recorded version number in distutils-based |
21 | 16 | python projects. The goal is to remove the tedious and error-prone "update |
26 | 21 | |
27 | 22 | ## Quick Install |
28 | 23 | |
29 | * `pip install versioneer` to somewhere to your $PATH | |
30 | * add a `[versioneer]` section to your setup.cfg (see below) | |
24 | * `pip install versioneer` to somewhere in your $PATH | |
25 | * add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) | |
31 | 26 | * run `versioneer install` in your source tree, commit the results |
27 | * Verify version information with `python setup.py version` | |
32 | 28 | |
33 | 29 | ## Version Identifiers |
34 | 30 | |
60 | 56 | for example `git describe --tags --dirty --always` reports things like |
61 | 57 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the |
62 | 58 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has |
63 | uncommitted changes. | |
59 | uncommitted changes). | |
64 | 60 | |
65 | 61 | The version identifier is used for multiple purposes: |
66 | 62 | |
87 | 83 | |
88 | 84 | ## Installation |
89 | 85 | |
90 | First, decide on values for the following configuration variables: | |
91 | ||
92 | * `VCS`: the version control system you use. Currently accepts "git". | |
93 | ||
94 | * `style`: the style of version string to be produced. See "Styles" below for | |
95 | details. Defaults to "pep440", which looks like | |
96 | `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. | |
97 | ||
98 | * `versionfile_source`: | |
99 | ||
100 | A project-relative pathname into which the generated version strings should | |
101 | be written. This is usually a `_version.py` next to your project's main | |
102 | `__init__.py` file, so it can be imported at runtime. If your project uses | |
103 | `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. | |
104 | This file should be checked in to your VCS as usual: the copy created below | |
105 | by `setup.py setup_versioneer` will include code that parses expanded VCS | |
106 | keywords in generated tarballs. The 'build' and 'sdist' commands will | |
107 | replace it with a copy that has just the calculated version string. | |
108 | ||
109 | This must be set even if your project does not have any modules (and will | |
110 | therefore never import `_version.py`), since "setup.py sdist" -based trees | |
111 | still need somewhere to record the pre-calculated version strings. Anywhere | |
112 | in the source tree should do. If there is a `__init__.py` next to your | |
113 | `_version.py`, the `setup.py setup_versioneer` command (described below) | |
114 | will append some `__version__`-setting assignments, if they aren't already | |
115 | present. | |
116 | ||
117 | * `versionfile_build`: | |
118 | ||
119 | Like `versionfile_source`, but relative to the build directory instead of | |
120 | the source directory. These will differ when your setup.py uses | |
121 | 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, | |
122 | then you will probably have `versionfile_build='myproject/_version.py'` and | |
123 | `versionfile_source='src/myproject/_version.py'`. | |
124 | ||
125 | If this is set to None, then `setup.py build` will not attempt to rewrite | |
126 | any `_version.py` in the built tree. If your project does not have any | |
127 | libraries (e.g. if it only builds a script), then you should use | |
128 | `versionfile_build = None`. To actually use the computed version string, | |
129 | your `setup.py` will need to override `distutils.command.build_scripts` | |
130 | with a subclass that explicitly inserts a copy of | |
131 | `versioneer.get_version()` into your script file. See | |
132 | `test/demoapp-script-only/setup.py` for an example. | |
133 | ||
134 | * `tag_prefix`: | |
135 | ||
136 | a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. | |
137 | If your tags look like 'myproject-1.2.0', then you should use | |
138 | tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this | |
139 | should be an empty string, using either `tag_prefix=` or `tag_prefix=''`. | |
140 | ||
141 | * `parentdir_prefix`: | |
142 | ||
143 | a optional string, frequently the same as tag_prefix, which appears at the | |
144 | start of all unpacked tarball filenames. If your tarball unpacks into | |
145 | 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, | |
146 | just omit the field from your `setup.cfg`. | |
147 | ||
148 | This tool provides one script, named `versioneer`. That script has one mode, | |
149 | "install", which writes a copy of `versioneer.py` into the current directory | |
150 | and runs `versioneer.py setup` to finish the installation. | |
151 | ||
152 | To versioneer-enable your project: | |
153 | ||
154 | * 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and | |
155 | populating it with the configuration values you decided earlier (note that | |
156 | the option names are not case-sensitive): | |
157 | ||
158 | ```` | |
159 | [versioneer] | |
160 | VCS = git | |
161 | style = pep440 | |
162 | versionfile_source = src/myproject/_version.py | |
163 | versionfile_build = myproject/_version.py | |
164 | tag_prefix = | |
165 | parentdir_prefix = myproject- | |
166 | ```` | |
167 | ||
168 | * 2: Run `versioneer install`. This will do the following: | |
169 | ||
170 | * copy `versioneer.py` into the top of your source tree | |
171 | * create `_version.py` in the right place (`versionfile_source`) | |
172 | * modify your `__init__.py` (if one exists next to `_version.py`) to define | |
173 | `__version__` (by calling a function from `_version.py`) | |
174 | * modify your `MANIFEST.in` to include both `versioneer.py` and the | |
175 | generated `_version.py` in sdist tarballs | |
176 | ||
177 | `versioneer install` will complain about any problems it finds with your | |
178 | `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all | |
179 | the problems. | |
180 | ||
181 | * 3: add a `import versioneer` to your setup.py, and add the following | |
182 | arguments to the setup() call: | |
183 | ||
184 | version=versioneer.get_version(), | |
185 | cmdclass=versioneer.get_cmdclass(), | |
186 | ||
187 | * 4: commit these changes to your VCS. To make sure you won't forget, | |
188 | `versioneer install` will mark everything it touched for addition using | |
189 | `git add`. Don't forget to add `setup.py` and `setup.cfg` too. | |
190 | ||
191 | ## Post-Installation Usage | |
192 | ||
193 | Once established, all uses of your tree from a VCS checkout should get the | |
194 | current version string. All generated tarballs should include an embedded | |
195 | version string (so users who unpack them will not need a VCS tool installed). | |
196 | ||
197 | If you distribute your project through PyPI, then the release process should | |
198 | boil down to two steps: | |
199 | ||
200 | * 1: git tag 1.0 | |
201 | * 2: python setup.py register sdist upload | |
202 | ||
203 | If you distribute it through github (i.e. users use github to generate | |
204 | tarballs with `git archive`), the process is: | |
205 | ||
206 | * 1: git tag 1.0 | |
207 | * 2: git push; git push --tags | |
208 | ||
209 | Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at | |
210 | least one tag in its history. | |
86 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. | |
211 | 87 | |
212 | 88 | ## Version-String Flavors |
213 | 89 | |
227 | 103 | |
228 | 104 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the |
229 | 105 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". |
106 | ||
107 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the | |
108 | commit date in ISO 8601 format. This will be None if the date is not | |
109 | available. | |
230 | 110 | |
231 | 111 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that |
232 | 112 | this is only accurate if run in a VCS checkout, otherwise it is likely to |
266 | 146 | software (exactly equal to a known tag), the identifier will only contain the |
267 | 147 | stripped tag, e.g. "0.11". |
268 | 148 | |
269 | Other styles are available. See details.md in the Versioneer source tree for | |
270 | descriptions. | |
149 | Other styles are available. See [details.md](details.md) in the Versioneer | |
150 | source tree for descriptions. | |
271 | 151 | |
272 | 152 | ## Debugging |
273 | 153 | |
277 | 157 | display the full contents of `get_versions()` (including the `error` string, |
278 | 158 | which may help identify what went wrong). |
279 | 159 | |
160 | ## Known Limitations | |
161 | ||
162 | Some situations are known to cause problems for Versioneer. This details the | |
163 | most significant ones. More can be found on Github | |
164 | [issues page](https://github.com/python-versioneer/python-versioneer/issues). | |
165 | ||
166 | ### Subprojects | |
167 | ||
168 | Versioneer has limited support for source trees in which `setup.py` is not in | |
169 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are | |
170 | two common reasons why `setup.py` might not be in the root: | |
171 | ||
172 | * Source trees which contain multiple subprojects, such as | |
173 | [Buildbot](https://github.com/buildbot/buildbot), which contains both | |
174 | "master" and "slave" subprojects, each with their own `setup.py`, | |
175 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI | |
176 | distributions (and upload multiple independently-installable tarballs). | |
177 | * Source trees whose main purpose is to contain a C library, but which also | |
178 | provide bindings to Python (and perhaps other languages) in subdirectories. | |
179 | ||
180 | Versioneer will look for `.git` in parent directories, and most operations | |
181 | should get the right version string. However `pip` and `setuptools` have bugs | |
182 | and implementation details which frequently cause `pip install .` from a | |
183 | subproject directory to fail to find a correct version string (so it usually | |
184 | defaults to `0+unknown`). | |
185 | ||
186 | `pip install --editable .` should work correctly. `setup.py install` might | |
187 | work too. | |
188 | ||
189 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in | |
190 | some later version. | |
191 | ||
192 | [Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking | |
193 | this issue. The discussion in | |
194 | [PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the | |
195 | issue from the Versioneer side in more detail. | |
196 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and | |
197 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve | |
198 | pip to let Versioneer work correctly. | |
199 | ||
200 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the | |
201 | `setup.cfg`, so subprojects were completely unsupported with those releases. | |
202 | ||
203 | ### Editable installs with setuptools <= 18.5 | |
204 | ||
205 | `setup.py develop` and `pip install --editable .` allow you to install a | |
206 | project into a virtualenv once, then continue editing the source code (and | |
207 | test) without re-installing after every change. | |
208 | ||
209 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a | |
210 | convenient way to specify executable scripts that should be installed along | |
211 | with the python package. | |
212 | ||
213 | These both work as expected when using modern setuptools. When using | |
214 | setuptools-18.5 or earlier, however, certain operations will cause | |
215 | `pkg_resources.DistributionNotFound` errors when running the entrypoint | |
216 | script, which must be resolved by re-installing the package. This happens | |
217 | when the install happens with one version, then the egg_info data is | |
218 | regenerated while a different version is checked out. Many setup.py commands | |
219 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into | |
220 | a different virtualenv), so this can be surprising. | |
221 | ||
222 | [Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes | |
223 | this one, but upgrading to a newer version of setuptools should probably | |
224 | resolve it. | |
225 | ||
226 | ||
280 | 227 | ## Updating Versioneer |
281 | 228 | |
282 | 229 | To upgrade your project to a new release of Versioneer, do the following: |
283 | 230 | |
284 | 231 | * install the new Versioneer (`pip install -U versioneer` or equivalent) |
285 | 232 | * edit `setup.cfg`, if necessary, to include any new configuration settings |
286 | indicated by the release notes | |
233 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. | |
287 | 234 | * re-run `versioneer install` in your source tree, to replace |
288 | 235 | `SRC/_version.py` |
289 | 236 | * commit any changed files |
290 | ||
291 | ### Upgrading to 0.16 | |
292 | ||
293 | Nothing special. | |
294 | ||
295 | ### Upgrading to 0.15 | |
296 | ||
297 | Starting with this version, Versioneer is configured with a `[versioneer]` | |
298 | section in your `setup.cfg` file. Earlier versions required the `setup.py` to | |
299 | set attributes on the `versioneer` module immediately after import. The new | |
300 | version will refuse to run (raising an exception during import) until you | |
301 | have provided the necessary `setup.cfg` section. | |
302 | ||
303 | In addition, the Versioneer package provides an executable named | |
304 | `versioneer`, and the installation process is driven by running `versioneer | |
305 | install`. In 0.14 and earlier, the executable was named | |
306 | `versioneer-installer` and was run without an argument. | |
307 | ||
308 | ### Upgrading to 0.14 | |
309 | ||
310 | 0.14 changes the format of the version string. 0.13 and earlier used | |
311 | hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a | |
312 | plus-separated "local version" section strings, with dot-separated | |
313 | components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old | |
314 | format, but should be ok with the new one. | |
315 | ||
316 | ### Upgrading from 0.11 to 0.12 | |
317 | ||
318 | Nothing special. | |
319 | ||
320 | ### Upgrading from 0.10 to 0.11 | |
321 | ||
322 | You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running | |
323 | `setup.py setup_versioneer`. This will enable the use of additional | |
324 | version-control systems (SVN, etc) in the future. | |
325 | 237 | |
326 | 238 | ## Future Directions |
327 | 239 | |
336 | 248 | direction and include code from all supported VCS systems, reducing the |
337 | 249 | number of intermediate scripts. |
338 | 250 | |
251 | ## Similar projects | |
252 | ||
253 | * [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time | |
254 | dependency | |
255 | * [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of | |
256 | versioneer | |
257 | * [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools | |
258 | plugin | |
339 | 259 | |
340 | 260 | ## License |
341 | 261 | |
345 | 265 | Dedication" license (CC0-1.0), as described in |
346 | 266 | https://creativecommons.org/publicdomain/zero/1.0/ . |
347 | 267 | |
268 | [pypi-image]: https://img.shields.io/pypi/v/versioneer.svg | |
269 | [pypi-url]: https://pypi.python.org/pypi/versioneer/ | |
270 | [travis-image]: | |
271 | https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg | |
272 | [travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer | |
273 | ||
348 | 274 | """ |
349 | ||
350 | from __future__ import print_function | |
351 | try: | |
352 | import configparser | |
353 | except ImportError: | |
354 | import ConfigParser as configparser | |
275 | # pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring | |
276 | # pylint:disable=missing-class-docstring,too-many-branches,too-many-statements | |
277 | # pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error | |
278 | # pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with | |
279 | # pylint:disable=attribute-defined-outside-init,too-many-arguments | |
280 | ||
281 | import configparser | |
355 | 282 | import errno |
356 | 283 | import json |
357 | 284 | import os |
358 | 285 | import re |
359 | 286 | import subprocess |
360 | 287 | import sys |
288 | from typing import Callable, Dict | |
361 | 289 | |
362 | 290 | |
363 | 291 | class VersioneerConfig: |
379 | 307 | setup_py = os.path.join(root, "setup.py") |
380 | 308 | versioneer_py = os.path.join(root, "versioneer.py") |
381 | 309 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): |
382 | err = ("Versioneer was unable to run the project root directory. " | |
383 | "Versioneer requires setup.py to be executed from " | |
384 | "its immediate directory (like 'python setup.py COMMAND'), " | |
385 | "or in a way that lets it use sys.argv[0] to find the root " | |
386 | "(like 'python path/to/setup.py COMMAND').") | |
310 | err = ( | |
311 | "Versioneer was unable to run the project root directory. " | |
312 | "Versioneer requires setup.py to be executed from " | |
313 | "its immediate directory (like 'python setup.py COMMAND'), " | |
314 | "or in a way that lets it use sys.argv[0] to find the root " | |
315 | "(like 'python path/to/setup.py COMMAND')." | |
316 | ) | |
387 | 317 | raise VersioneerBadRootError(err) |
388 | 318 | try: |
389 | 319 | # Certain runtime workflows (setup.py install/develop in a setuptools |
392 | 322 | # module-import table will cache the first one. So we can't use |
393 | 323 | # os.path.dirname(__file__), as that will find whichever |
394 | 324 | # versioneer.py was first imported, even in later projects. |
395 | me = os.path.realpath(os.path.abspath(__file__)) | |
396 | if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: | |
397 | print("Warning: build in %s is using versioneer.py from %s" | |
398 | % (os.path.dirname(me), versioneer_py)) | |
325 | my_path = os.path.realpath(os.path.abspath(__file__)) | |
326 | me_dir = os.path.normcase(os.path.splitext(my_path)[0]) | |
327 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) | |
328 | if me_dir != vsr_dir: | |
329 | print( | |
330 | "Warning: build in %s is using versioneer.py from %s" | |
331 | % (os.path.dirname(my_path), versioneer_py) | |
332 | ) | |
399 | 333 | except NameError: |
400 | 334 | pass |
401 | 335 | return root |
403 | 337 | |
404 | 338 | def get_config_from_root(root): |
405 | 339 | """Read the project setup.cfg file to determine Versioneer config.""" |
406 | # This might raise EnvironmentError (if setup.cfg is missing), or | |
340 | # This might raise OSError (if setup.cfg is missing), or | |
407 | 341 | # configparser.NoSectionError (if it lacks a [versioneer] section), or |
408 | 342 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at |
409 | 343 | # the top of versioneer.py for instructions on writing your setup.cfg . |
410 | 344 | setup_cfg = os.path.join(root, "setup.cfg") |
411 | parser = configparser.SafeConfigParser() | |
412 | with open(setup_cfg, "r") as f: | |
413 | parser.readfp(f) | |
345 | parser = configparser.ConfigParser() | |
346 | with open(setup_cfg, "r") as cfg_file: | |
347 | parser.read_file(cfg_file) | |
414 | 348 | VCS = parser.get("versioneer", "VCS") # mandatory |
415 | 349 | |
416 | def get(parser, name): | |
417 | if parser.has_option("versioneer", name): | |
418 | return parser.get("versioneer", name) | |
419 | return None | |
350 | # Dict-like interface for non-mandatory entries | |
351 | section = parser["versioneer"] | |
352 | ||
420 | 353 | cfg = VersioneerConfig() |
421 | 354 | cfg.VCS = VCS |
422 | cfg.style = get(parser, "style") or "" | |
423 | cfg.versionfile_source = get(parser, "versionfile_source") | |
424 | cfg.versionfile_build = get(parser, "versionfile_build") | |
425 | cfg.tag_prefix = get(parser, "tag_prefix") | |
355 | cfg.style = section.get("style", "") | |
356 | cfg.versionfile_source = section.get("versionfile_source") | |
357 | cfg.versionfile_build = section.get("versionfile_build") | |
358 | cfg.tag_prefix = section.get("tag_prefix") | |
426 | 359 | if cfg.tag_prefix in ("''", '""'): |
427 | 360 | cfg.tag_prefix = "" |
428 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") | |
429 | cfg.verbose = get(parser, "verbose") | |
361 | cfg.parentdir_prefix = section.get("parentdir_prefix") | |
362 | cfg.verbose = section.get("verbose") | |
430 | 363 | return cfg |
431 | 364 | |
432 | 365 | |
433 | 366 | class NotThisMethod(Exception): |
434 | 367 | """Exception raised if a method is not valid for the current scenario.""" |
435 | 368 | |
369 | ||
436 | 370 | # these dictionaries contain VCS-specific tools |
437 | LONG_VERSION_PY = {} | |
438 | HANDLERS = {} | |
371 | LONG_VERSION_PY: Dict[str, str] = {} | |
372 | HANDLERS: Dict[str, Dict[str, Callable]] = {} | |
439 | 373 | |
440 | 374 | |
441 | 375 | def register_vcs_handler(vcs, method): # decorator |
442 | """Decorator to mark a method as the handler for a particular VCS.""" | |
376 | """Create decorator to mark a method as the handler of a VCS.""" | |
377 | ||
443 | 378 | def decorate(f): |
444 | 379 | """Store f in HANDLERS[vcs][method].""" |
445 | if vcs not in HANDLERS: | |
446 | HANDLERS[vcs] = {} | |
447 | HANDLERS[vcs][method] = f | |
380 | HANDLERS.setdefault(vcs, {})[method] = f | |
448 | 381 | return f |
382 | ||
449 | 383 | return decorate |
450 | 384 | |
451 | 385 | |
452 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): | |
386 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): | |
453 | 387 | """Call the given command(s).""" |
454 | 388 | assert isinstance(commands, list) |
455 | p = None | |
456 | for c in commands: | |
389 | process = None | |
390 | for command in commands: | |
457 | 391 | try: |
458 | dispcmd = str([c] + args) | |
392 | dispcmd = str([command] + args) | |
459 | 393 | # remember shell=False, so use git.cmd on windows, not just git |
460 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, | |
461 | stderr=(subprocess.PIPE if hide_stderr | |
462 | else None)) | |
394 | process = subprocess.Popen( | |
395 | [command] + args, | |
396 | cwd=cwd, | |
397 | env=env, | |
398 | stdout=subprocess.PIPE, | |
399 | stderr=(subprocess.PIPE if hide_stderr else None), | |
400 | ) | |
463 | 401 | break |
464 | except EnvironmentError: | |
402 | except OSError: | |
465 | 403 | e = sys.exc_info()[1] |
466 | 404 | if e.errno == errno.ENOENT: |
467 | 405 | continue |
468 | 406 | if verbose: |
469 | 407 | print("unable to run %s" % dispcmd) |
470 | 408 | print(e) |
471 | return None | |
409 | return None, None | |
472 | 410 | else: |
473 | 411 | if verbose: |
474 | 412 | print("unable to find command, tried %s" % (commands,)) |
475 | return None | |
476 | stdout = p.communicate()[0].strip() | |
477 | if sys.version_info[0] >= 3: | |
478 | stdout = stdout.decode() | |
479 | if p.returncode != 0: | |
413 | return None, None | |
414 | stdout = process.communicate()[0].strip().decode() | |
415 | if process.returncode != 0: | |
480 | 416 | if verbose: |
481 | 417 | print("unable to run %s (error)" % dispcmd) |
482 | return None | |
483 | return stdout | |
484 | LONG_VERSION_PY['git'] = ''' | |
418 | print("stdout was %s" % stdout) | |
419 | return None, process.returncode | |
420 | return stdout, process.returncode | |
421 | ||
422 | ||
423 | LONG_VERSION_PY[ | |
424 | "git" | |
425 | ] = r''' | |
485 | 426 | # This file helps to compute a version number in source trees obtained from |
486 | 427 | # git-archive tarball (such as those provided by githubs download-from-tag |
487 | 428 | # feature). Distribution tarballs (built by setup.py sdist) and build |
489 | 430 | # that just contains the computed version number. |
490 | 431 | |
491 | 432 | # This file is released into the public domain. Generated by |
492 | # versioneer-0.16 (https://github.com/warner/python-versioneer) | |
433 | # versioneer-0.21 (https://github.com/python-versioneer/python-versioneer) | |
493 | 434 | |
494 | 435 | """Git implementation of _version.py.""" |
495 | 436 | |
498 | 439 | import re |
499 | 440 | import subprocess |
500 | 441 | import sys |
442 | from typing import Callable, Dict | |
501 | 443 | |
502 | 444 | |
503 | 445 | def get_keywords(): |
508 | 450 | # get_keywords(). |
509 | 451 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" |
510 | 452 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" |
511 | keywords = {"refnames": git_refnames, "full": git_full} | |
453 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" | |
454 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} | |
512 | 455 | return keywords |
513 | 456 | |
514 | 457 | |
534 | 477 | """Exception raised if a method is not valid for the current scenario.""" |
535 | 478 | |
536 | 479 | |
537 | LONG_VERSION_PY = {} | |
538 | HANDLERS = {} | |
480 | LONG_VERSION_PY: Dict[str, str] = {} | |
481 | HANDLERS: Dict[str, Dict[str, Callable]] = {} | |
539 | 482 | |
540 | 483 | |
541 | 484 | def register_vcs_handler(vcs, method): # decorator |
542 | """Decorator to mark a method as the handler for a particular VCS.""" | |
485 | """Create decorator to mark a method as the handler of a VCS.""" | |
543 | 486 | def decorate(f): |
544 | 487 | """Store f in HANDLERS[vcs][method].""" |
545 | 488 | if vcs not in HANDLERS: |
549 | 492 | return decorate |
550 | 493 | |
551 | 494 | |
552 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): | |
495 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, | |
496 | env=None): | |
553 | 497 | """Call the given command(s).""" |
554 | 498 | assert isinstance(commands, list) |
555 | p = None | |
556 | for c in commands: | |
499 | process = None | |
500 | for command in commands: | |
557 | 501 | try: |
558 | dispcmd = str([c] + args) | |
502 | dispcmd = str([command] + args) | |
559 | 503 | # remember shell=False, so use git.cmd on windows, not just git |
560 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, | |
561 | stderr=(subprocess.PIPE if hide_stderr | |
562 | else None)) | |
504 | process = subprocess.Popen([command] + args, cwd=cwd, env=env, | |
505 | stdout=subprocess.PIPE, | |
506 | stderr=(subprocess.PIPE if hide_stderr | |
507 | else None)) | |
563 | 508 | break |
564 | except EnvironmentError: | |
509 | except OSError: | |
565 | 510 | e = sys.exc_info()[1] |
566 | 511 | if e.errno == errno.ENOENT: |
567 | 512 | continue |
568 | 513 | if verbose: |
569 | 514 | print("unable to run %%s" %% dispcmd) |
570 | 515 | print(e) |
571 | return None | |
516 | return None, None | |
572 | 517 | else: |
573 | 518 | if verbose: |
574 | 519 | print("unable to find command, tried %%s" %% (commands,)) |
575 | return None | |
576 | stdout = p.communicate()[0].strip() | |
577 | if sys.version_info[0] >= 3: | |
578 | stdout = stdout.decode() | |
579 | if p.returncode != 0: | |
520 | return None, None | |
521 | stdout = process.communicate()[0].strip().decode() | |
522 | if process.returncode != 0: | |
580 | 523 | if verbose: |
581 | 524 | print("unable to run %%s (error)" %% dispcmd) |
582 | return None | |
583 | return stdout | |
525 | print("stdout was %%s" %% stdout) | |
526 | return None, process.returncode | |
527 | return stdout, process.returncode | |
584 | 528 | |
585 | 529 | |
586 | 530 | def versions_from_parentdir(parentdir_prefix, root, verbose): |
587 | 531 | """Try to determine the version from the parent directory name. |
588 | 532 | |
589 | Source tarballs conventionally unpack into a directory that includes | |
590 | both the project name and a version string. | |
591 | """ | |
592 | dirname = os.path.basename(root) | |
593 | if not dirname.startswith(parentdir_prefix): | |
594 | if verbose: | |
595 | print("guessing rootdir is '%%s', but '%%s' doesn't start with " | |
596 | "prefix '%%s'" %% (root, dirname, parentdir_prefix)) | |
597 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
598 | return {"version": dirname[len(parentdir_prefix):], | |
599 | "full-revisionid": None, | |
600 | "dirty": False, "error": None} | |
533 | Source tarballs conventionally unpack into a directory that includes both | |
534 | the project name and a version string. We will also support searching up | |
535 | two directory levels for an appropriately named parent directory | |
536 | """ | |
537 | rootdirs = [] | |
538 | ||
539 | for _ in range(3): | |
540 | dirname = os.path.basename(root) | |
541 | if dirname.startswith(parentdir_prefix): | |
542 | return {"version": dirname[len(parentdir_prefix):], | |
543 | "full-revisionid": None, | |
544 | "dirty": False, "error": None, "date": None} | |
545 | rootdirs.append(root) | |
546 | root = os.path.dirname(root) # up a level | |
547 | ||
548 | if verbose: | |
549 | print("Tried directories %%s but none started with prefix %%s" %% | |
550 | (str(rootdirs), parentdir_prefix)) | |
551 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
601 | 552 | |
602 | 553 | |
603 | 554 | @register_vcs_handler("git", "get_keywords") |
609 | 560 | # _version.py. |
610 | 561 | keywords = {} |
611 | 562 | try: |
612 | f = open(versionfile_abs, "r") | |
613 | for line in f.readlines(): | |
614 | if line.strip().startswith("git_refnames ="): | |
615 | mo = re.search(r'=\s*"(.*)"', line) | |
616 | if mo: | |
617 | keywords["refnames"] = mo.group(1) | |
618 | if line.strip().startswith("git_full ="): | |
619 | mo = re.search(r'=\s*"(.*)"', line) | |
620 | if mo: | |
621 | keywords["full"] = mo.group(1) | |
622 | f.close() | |
623 | except EnvironmentError: | |
563 | with open(versionfile_abs, "r") as fobj: | |
564 | for line in fobj: | |
565 | if line.strip().startswith("git_refnames ="): | |
566 | mo = re.search(r'=\s*"(.*)"', line) | |
567 | if mo: | |
568 | keywords["refnames"] = mo.group(1) | |
569 | if line.strip().startswith("git_full ="): | |
570 | mo = re.search(r'=\s*"(.*)"', line) | |
571 | if mo: | |
572 | keywords["full"] = mo.group(1) | |
573 | if line.strip().startswith("git_date ="): | |
574 | mo = re.search(r'=\s*"(.*)"', line) | |
575 | if mo: | |
576 | keywords["date"] = mo.group(1) | |
577 | except OSError: | |
624 | 578 | pass |
625 | 579 | return keywords |
626 | 580 | |
628 | 582 | @register_vcs_handler("git", "keywords") |
629 | 583 | def git_versions_from_keywords(keywords, tag_prefix, verbose): |
630 | 584 | """Get version information from git keywords.""" |
631 | if not keywords: | |
632 | raise NotThisMethod("no keywords at all, weird") | |
585 | if "refnames" not in keywords: | |
586 | raise NotThisMethod("Short version file found") | |
587 | date = keywords.get("date") | |
588 | if date is not None: | |
589 | # Use only the last line. Previous lines may contain GPG signature | |
590 | # information. | |
591 | date = date.splitlines()[-1] | |
592 | ||
593 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant | |
594 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 | |
595 | # -like" string, which we must then edit to make compliant), because | |
596 | # it's been around since git-1.5.3, and it's too difficult to | |
597 | # discover which version we're using, or to work around using an | |
598 | # older one. | |
599 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
633 | 600 | refnames = keywords["refnames"].strip() |
634 | 601 | if refnames.startswith("$Format"): |
635 | 602 | if verbose: |
636 | 603 | print("keywords are unexpanded, not using") |
637 | 604 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") |
638 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) | |
605 | refs = {r.strip() for r in refnames.strip("()").split(",")} | |
639 | 606 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of |
640 | 607 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. |
641 | 608 | TAG = "tag: " |
642 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) | |
609 | tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} | |
643 | 610 | if not tags: |
644 | 611 | # Either we're using git < 1.8.3, or there really are no tags. We use |
645 | 612 | # a heuristic: assume all version tags have a digit. The old git %%d |
648 | 615 | # between branches and tags. By ignoring refnames without digits, we |
649 | 616 | # filter out many common branch names like "release" and |
650 | 617 | # "stabilization", as well as "HEAD" and "master". |
651 | tags = set([r for r in refs if re.search(r'\d', r)]) | |
618 | tags = {r for r in refs if re.search(r'\d', r)} | |
652 | 619 | if verbose: |
653 | print("discarding '%%s', no digits" %% ",".join(refs-tags)) | |
620 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) | |
654 | 621 | if verbose: |
655 | 622 | print("likely tags: %%s" %% ",".join(sorted(tags))) |
656 | 623 | for ref in sorted(tags): |
657 | 624 | # sorting will prefer e.g. "2.0" over "2.0rc1" |
658 | 625 | if ref.startswith(tag_prefix): |
659 | 626 | r = ref[len(tag_prefix):] |
627 | # Filter out refs that exactly match prefix or that don't start | |
628 | # with a number once the prefix is stripped (mostly a concern | |
629 | # when prefix is '') | |
630 | if not re.match(r'\d', r): | |
631 | continue | |
660 | 632 | if verbose: |
661 | 633 | print("picking %%s" %% r) |
662 | 634 | return {"version": r, |
663 | 635 | "full-revisionid": keywords["full"].strip(), |
664 | "dirty": False, "error": None | |
665 | } | |
636 | "dirty": False, "error": None, | |
637 | "date": date} | |
666 | 638 | # no suitable tags, so version is "0+unknown", but full hex is still there |
667 | 639 | if verbose: |
668 | 640 | print("no suitable tags, using unknown + full revision id") |
669 | 641 | return {"version": "0+unknown", |
670 | 642 | "full-revisionid": keywords["full"].strip(), |
671 | "dirty": False, "error": "no suitable tags"} | |
643 | "dirty": False, "error": "no suitable tags", "date": None} | |
672 | 644 | |
673 | 645 | |
674 | 646 | @register_vcs_handler("git", "pieces_from_vcs") |
675 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): | |
647 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): | |
676 | 648 | """Get version from 'git describe' in the root of the source tree. |
677 | 649 | |
678 | 650 | This only gets called if the git-archive 'subst' keywords were *not* |
679 | 651 | expanded, and _version.py hasn't already been rewritten with a short |
680 | 652 | version string, meaning we're inside a checked out source tree. |
681 | 653 | """ |
682 | if not os.path.exists(os.path.join(root, ".git")): | |
683 | if verbose: | |
684 | print("no .git in %%s" %% root) | |
685 | raise NotThisMethod("no .git directory") | |
686 | ||
687 | 654 | GITS = ["git"] |
655 | TAG_PREFIX_REGEX = "*" | |
688 | 656 | if sys.platform == "win32": |
689 | 657 | GITS = ["git.cmd", "git.exe"] |
658 | TAG_PREFIX_REGEX = r"\*" | |
659 | ||
660 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, | |
661 | hide_stderr=True) | |
662 | if rc != 0: | |
663 | if verbose: | |
664 | print("Directory %%s not under git control" %% root) | |
665 | raise NotThisMethod("'git rev-parse --git-dir' returned error") | |
666 | ||
690 | 667 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] |
691 | 668 | # if there isn't one, this yields HEX[-dirty] (no NUM) |
692 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", | |
693 | "--always", "--long", | |
694 | "--match", "%%s*" %% tag_prefix], | |
695 | cwd=root) | |
669 | describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", | |
670 | "--always", "--long", | |
671 | "--match", | |
672 | "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)], | |
673 | cwd=root) | |
696 | 674 | # --long was added in git-1.5.5 |
697 | 675 | if describe_out is None: |
698 | 676 | raise NotThisMethod("'git describe' failed") |
699 | 677 | describe_out = describe_out.strip() |
700 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) | |
678 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) | |
701 | 679 | if full_out is None: |
702 | 680 | raise NotThisMethod("'git rev-parse' failed") |
703 | 681 | full_out = full_out.strip() |
706 | 684 | pieces["long"] = full_out |
707 | 685 | pieces["short"] = full_out[:7] # maybe improved later |
708 | 686 | pieces["error"] = None |
687 | ||
688 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], | |
689 | cwd=root) | |
690 | # --abbrev-ref was added in git-1.6.3 | |
691 | if rc != 0 or branch_name is None: | |
692 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") | |
693 | branch_name = branch_name.strip() | |
694 | ||
695 | if branch_name == "HEAD": | |
696 | # If we aren't exactly on a branch, pick a branch which represents | |
697 | # the current commit. If all else fails, we are on a branchless | |
698 | # commit. | |
699 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) | |
700 | # --contains was added in git-1.5.4 | |
701 | if rc != 0 or branches is None: | |
702 | raise NotThisMethod("'git branch --contains' returned error") | |
703 | branches = branches.split("\n") | |
704 | ||
705 | # Remove the first line if we're running detached | |
706 | if "(" in branches[0]: | |
707 | branches.pop(0) | |
708 | ||
709 | # Strip off the leading "* " from the list of branches. | |
710 | branches = [branch[2:] for branch in branches] | |
711 | if "master" in branches: | |
712 | branch_name = "master" | |
713 | elif not branches: | |
714 | branch_name = None | |
715 | else: | |
716 | # Pick the first branch that is returned. Good or bad. | |
717 | branch_name = branches[0] | |
718 | ||
719 | pieces["branch"] = branch_name | |
709 | 720 | |
710 | 721 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] |
711 | 722 | # TAG might have hyphens. |
723 | 734 | # TAG-NUM-gHEX |
724 | 735 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) |
725 | 736 | if not mo: |
726 | # unparseable. Maybe git-describe is misbehaving? | |
737 | # unparsable. Maybe git-describe is misbehaving? | |
727 | 738 | pieces["error"] = ("unable to parse git-describe output: '%%s'" |
728 | 739 | %% describe_out) |
729 | 740 | return pieces |
748 | 759 | else: |
749 | 760 | # HEX: no tags |
750 | 761 | pieces["closest-tag"] = None |
751 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], | |
752 | cwd=root) | |
762 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) | |
753 | 763 | pieces["distance"] = int(count_out) # total number of commits |
764 | ||
765 | # commit date: see ISO-8601 comment in git_versions_from_keywords() | |
766 | date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() | |
767 | # Use only the last line. Previous lines may contain GPG signature | |
768 | # information. | |
769 | date = date.splitlines()[-1] | |
770 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
754 | 771 | |
755 | 772 | return pieces |
756 | 773 | |
787 | 804 | return rendered |
788 | 805 | |
789 | 806 | |
790 | def render_pep440_pre(pieces): | |
791 | """TAG[.post.devDISTANCE] -- No -dirty. | |
807 | def render_pep440_branch(pieces): | |
808 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . | |
809 | ||
810 | The ".dev0" means not master branch. Note that .dev0 sorts backwards | |
811 | (a feature branch will appear "older" than the master branch). | |
792 | 812 | |
793 | 813 | Exceptions: |
794 | 1: no tags. 0.post.devDISTANCE | |
814 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] | |
795 | 815 | """ |
796 | 816 | if pieces["closest-tag"]: |
797 | 817 | rendered = pieces["closest-tag"] |
818 | if pieces["distance"] or pieces["dirty"]: | |
819 | if pieces["branch"] != "master": | |
820 | rendered += ".dev0" | |
821 | rendered += plus_or_dot(pieces) | |
822 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) | |
823 | if pieces["dirty"]: | |
824 | rendered += ".dirty" | |
825 | else: | |
826 | # exception #1 | |
827 | rendered = "0" | |
828 | if pieces["branch"] != "master": | |
829 | rendered += ".dev0" | |
830 | rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], | |
831 | pieces["short"]) | |
832 | if pieces["dirty"]: | |
833 | rendered += ".dirty" | |
834 | return rendered | |
835 | ||
836 | ||
837 | def pep440_split_post(ver): | |
838 | """Split pep440 version string at the post-release segment. | |
839 | ||
840 | Returns the release segments before the post-release and the | |
841 | post-release version number (or -1 if no post-release segment is present). | |
842 | """ | |
843 | vc = str.split(ver, ".post") | |
844 | return vc[0], int(vc[1] or 0) if len(vc) == 2 else None | |
845 | ||
846 | ||
847 | def render_pep440_pre(pieces): | |
848 | """TAG[.postN.devDISTANCE] -- No -dirty. | |
849 | ||
850 | Exceptions: | |
851 | 1: no tags. 0.post0.devDISTANCE | |
852 | """ | |
853 | if pieces["closest-tag"]: | |
798 | 854 | if pieces["distance"]: |
799 | rendered += ".post.dev%%d" %% pieces["distance"] | |
855 | # update the post release segment | |
856 | tag_version, post_version = pep440_split_post(pieces["closest-tag"]) | |
857 | rendered = tag_version | |
858 | if post_version is not None: | |
859 | rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"]) | |
860 | else: | |
861 | rendered += ".post0.dev%%d" %% (pieces["distance"]) | |
862 | else: | |
863 | # no commits, use the tag as the version | |
864 | rendered = pieces["closest-tag"] | |
800 | 865 | else: |
801 | 866 | # exception #1 |
802 | rendered = "0.post.dev%%d" %% pieces["distance"] | |
867 | rendered = "0.post0.dev%%d" %% pieces["distance"] | |
803 | 868 | return rendered |
804 | 869 | |
805 | 870 | |
830 | 895 | return rendered |
831 | 896 | |
832 | 897 | |
898 | def render_pep440_post_branch(pieces): | |
899 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . | |
900 | ||
901 | The ".dev0" means not master branch. | |
902 | ||
903 | Exceptions: | |
904 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] | |
905 | """ | |
906 | if pieces["closest-tag"]: | |
907 | rendered = pieces["closest-tag"] | |
908 | if pieces["distance"] or pieces["dirty"]: | |
909 | rendered += ".post%%d" %% pieces["distance"] | |
910 | if pieces["branch"] != "master": | |
911 | rendered += ".dev0" | |
912 | rendered += plus_or_dot(pieces) | |
913 | rendered += "g%%s" %% pieces["short"] | |
914 | if pieces["dirty"]: | |
915 | rendered += ".dirty" | |
916 | else: | |
917 | # exception #1 | |
918 | rendered = "0.post%%d" %% pieces["distance"] | |
919 | if pieces["branch"] != "master": | |
920 | rendered += ".dev0" | |
921 | rendered += "+g%%s" %% pieces["short"] | |
922 | if pieces["dirty"]: | |
923 | rendered += ".dirty" | |
924 | return rendered | |
925 | ||
926 | ||
833 | 927 | def render_pep440_old(pieces): |
834 | 928 | """TAG[.postDISTANCE[.dev0]] . |
835 | 929 | |
836 | 930 | The ".dev0" means dirty. |
837 | 931 | |
838 | Eexceptions: | |
932 | Exceptions: | |
839 | 933 | 1: no tags. 0.postDISTANCE[.dev0] |
840 | 934 | """ |
841 | 935 | if pieces["closest-tag"]: |
898 | 992 | return {"version": "unknown", |
899 | 993 | "full-revisionid": pieces.get("long"), |
900 | 994 | "dirty": None, |
901 | "error": pieces["error"]} | |
995 | "error": pieces["error"], | |
996 | "date": None} | |
902 | 997 | |
903 | 998 | if not style or style == "default": |
904 | 999 | style = "pep440" # the default |
905 | 1000 | |
906 | 1001 | if style == "pep440": |
907 | 1002 | rendered = render_pep440(pieces) |
1003 | elif style == "pep440-branch": | |
1004 | rendered = render_pep440_branch(pieces) | |
908 | 1005 | elif style == "pep440-pre": |
909 | 1006 | rendered = render_pep440_pre(pieces) |
910 | 1007 | elif style == "pep440-post": |
911 | 1008 | rendered = render_pep440_post(pieces) |
1009 | elif style == "pep440-post-branch": | |
1010 | rendered = render_pep440_post_branch(pieces) | |
912 | 1011 | elif style == "pep440-old": |
913 | 1012 | rendered = render_pep440_old(pieces) |
914 | 1013 | elif style == "git-describe": |
919 | 1018 | raise ValueError("unknown style '%%s'" %% style) |
920 | 1019 | |
921 | 1020 | return {"version": rendered, "full-revisionid": pieces["long"], |
922 | "dirty": pieces["dirty"], "error": None} | |
1021 | "dirty": pieces["dirty"], "error": None, | |
1022 | "date": pieces.get("date")} | |
923 | 1023 | |
924 | 1024 | |
925 | 1025 | def get_versions(): |
943 | 1043 | # versionfile_source is the relative path from the top of the source |
944 | 1044 | # tree (where the .git directory might live) to this file. Invert |
945 | 1045 | # this to find the root from __file__. |
946 | for i in cfg.versionfile_source.split('/'): | |
1046 | for _ in cfg.versionfile_source.split('/'): | |
947 | 1047 | root = os.path.dirname(root) |
948 | 1048 | except NameError: |
949 | 1049 | return {"version": "0+unknown", "full-revisionid": None, |
950 | 1050 | "dirty": None, |
951 | "error": "unable to find root of source tree"} | |
1051 | "error": "unable to find root of source tree", | |
1052 | "date": None} | |
952 | 1053 | |
953 | 1054 | try: |
954 | 1055 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) |
964 | 1065 | |
965 | 1066 | return {"version": "0+unknown", "full-revisionid": None, |
966 | 1067 | "dirty": None, |
967 | "error": "unable to compute version"} | |
1068 | "error": "unable to compute version", "date": None} | |
968 | 1069 | ''' |
969 | 1070 | |
970 | 1071 | |
977 | 1078 | # _version.py. |
978 | 1079 | keywords = {} |
979 | 1080 | try: |
980 | f = open(versionfile_abs, "r") | |
981 | for line in f.readlines(): | |
982 | if line.strip().startswith("git_refnames ="): | |
983 | mo = re.search(r'=\s*"(.*)"', line) | |
984 | if mo: | |
985 | keywords["refnames"] = mo.group(1) | |
986 | if line.strip().startswith("git_full ="): | |
987 | mo = re.search(r'=\s*"(.*)"', line) | |
988 | if mo: | |
989 | keywords["full"] = mo.group(1) | |
990 | f.close() | |
991 | except EnvironmentError: | |
1081 | with open(versionfile_abs, "r") as fobj: | |
1082 | for line in fobj: | |
1083 | if line.strip().startswith("git_refnames ="): | |
1084 | mo = re.search(r'=\s*"(.*)"', line) | |
1085 | if mo: | |
1086 | keywords["refnames"] = mo.group(1) | |
1087 | if line.strip().startswith("git_full ="): | |
1088 | mo = re.search(r'=\s*"(.*)"', line) | |
1089 | if mo: | |
1090 | keywords["full"] = mo.group(1) | |
1091 | if line.strip().startswith("git_date ="): | |
1092 | mo = re.search(r'=\s*"(.*)"', line) | |
1093 | if mo: | |
1094 | keywords["date"] = mo.group(1) | |
1095 | except OSError: | |
992 | 1096 | pass |
993 | 1097 | return keywords |
994 | 1098 | |
996 | 1100 | @register_vcs_handler("git", "keywords") |
997 | 1101 | def git_versions_from_keywords(keywords, tag_prefix, verbose): |
998 | 1102 | """Get version information from git keywords.""" |
999 | if not keywords: | |
1000 | raise NotThisMethod("no keywords at all, weird") | |
1103 | if "refnames" not in keywords: | |
1104 | raise NotThisMethod("Short version file found") | |
1105 | date = keywords.get("date") | |
1106 | if date is not None: | |
1107 | # Use only the last line. Previous lines may contain GPG signature | |
1108 | # information. | |
1109 | date = date.splitlines()[-1] | |
1110 | ||
1111 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant | |
1112 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 | |
1113 | # -like" string, which we must then edit to make compliant), because | |
1114 | # it's been around since git-1.5.3, and it's too difficult to | |
1115 | # discover which version we're using, or to work around using an | |
1116 | # older one. | |
1117 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
1001 | 1118 | refnames = keywords["refnames"].strip() |
1002 | 1119 | if refnames.startswith("$Format"): |
1003 | 1120 | if verbose: |
1004 | 1121 | print("keywords are unexpanded, not using") |
1005 | 1122 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") |
1006 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) | |
1123 | refs = {r.strip() for r in refnames.strip("()").split(",")} | |
1007 | 1124 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of |
1008 | 1125 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. |
1009 | 1126 | TAG = "tag: " |
1010 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) | |
1127 | tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} | |
1011 | 1128 | if not tags: |
1012 | 1129 | # Either we're using git < 1.8.3, or there really are no tags. We use |
1013 | 1130 | # a heuristic: assume all version tags have a digit. The old git %d |
1016 | 1133 | # between branches and tags. By ignoring refnames without digits, we |
1017 | 1134 | # filter out many common branch names like "release" and |
1018 | 1135 | # "stabilization", as well as "HEAD" and "master". |
1019 | tags = set([r for r in refs if re.search(r'\d', r)]) | |
1136 | tags = {r for r in refs if re.search(r"\d", r)} | |
1020 | 1137 | if verbose: |
1021 | print("discarding '%s', no digits" % ",".join(refs-tags)) | |
1138 | print("discarding '%s', no digits" % ",".join(refs - tags)) | |
1022 | 1139 | if verbose: |
1023 | 1140 | print("likely tags: %s" % ",".join(sorted(tags))) |
1024 | 1141 | for ref in sorted(tags): |
1025 | 1142 | # sorting will prefer e.g. "2.0" over "2.0rc1" |
1026 | 1143 | if ref.startswith(tag_prefix): |
1027 | r = ref[len(tag_prefix):] | |
1144 | r = ref[len(tag_prefix) :] | |
1145 | # Filter out refs that exactly match prefix or that don't start | |
1146 | # with a number once the prefix is stripped (mostly a concern | |
1147 | # when prefix is '') | |
1148 | if not re.match(r"\d", r): | |
1149 | continue | |
1028 | 1150 | if verbose: |
1029 | 1151 | print("picking %s" % r) |
1030 | return {"version": r, | |
1031 | "full-revisionid": keywords["full"].strip(), | |
1032 | "dirty": False, "error": None | |
1033 | } | |
1152 | return { | |
1153 | "version": r, | |
1154 | "full-revisionid": keywords["full"].strip(), | |
1155 | "dirty": False, | |
1156 | "error": None, | |
1157 | "date": date, | |
1158 | } | |
1034 | 1159 | # no suitable tags, so version is "0+unknown", but full hex is still there |
1035 | 1160 | if verbose: |
1036 | 1161 | print("no suitable tags, using unknown + full revision id") |
1037 | return {"version": "0+unknown", | |
1038 | "full-revisionid": keywords["full"].strip(), | |
1039 | "dirty": False, "error": "no suitable tags"} | |
1162 | return { | |
1163 | "version": "0+unknown", | |
1164 | "full-revisionid": keywords["full"].strip(), | |
1165 | "dirty": False, | |
1166 | "error": "no suitable tags", | |
1167 | "date": None, | |
1168 | } | |
1040 | 1169 | |
1041 | 1170 | |
1042 | 1171 | @register_vcs_handler("git", "pieces_from_vcs") |
1043 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): | |
1172 | def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): | |
1044 | 1173 | """Get version from 'git describe' in the root of the source tree. |
1045 | 1174 | |
1046 | 1175 | This only gets called if the git-archive 'subst' keywords were *not* |
1047 | 1176 | expanded, and _version.py hasn't already been rewritten with a short |
1048 | 1177 | version string, meaning we're inside a checked out source tree. |
1049 | 1178 | """ |
1050 | if not os.path.exists(os.path.join(root, ".git")): | |
1051 | if verbose: | |
1052 | print("no .git in %s" % root) | |
1053 | raise NotThisMethod("no .git directory") | |
1054 | ||
1055 | 1179 | GITS = ["git"] |
1180 | TAG_PREFIX_REGEX = "*" | |
1056 | 1181 | if sys.platform == "win32": |
1057 | 1182 | GITS = ["git.cmd", "git.exe"] |
1183 | TAG_PREFIX_REGEX = r"\*" | |
1184 | ||
1185 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) | |
1186 | if rc != 0: | |
1187 | if verbose: | |
1188 | print("Directory %s not under git control" % root) | |
1189 | raise NotThisMethod("'git rev-parse --git-dir' returned error") | |
1190 | ||
1058 | 1191 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] |
1059 | 1192 | # if there isn't one, this yields HEX[-dirty] (no NUM) |
1060 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", | |
1061 | "--always", "--long", | |
1062 | "--match", "%s*" % tag_prefix], | |
1063 | cwd=root) | |
1193 | describe_out, rc = runner( | |
1194 | GITS, | |
1195 | [ | |
1196 | "describe", | |
1197 | "--tags", | |
1198 | "--dirty", | |
1199 | "--always", | |
1200 | "--long", | |
1201 | "--match", | |
1202 | "%s%s" % (tag_prefix, TAG_PREFIX_REGEX), | |
1203 | ], | |
1204 | cwd=root, | |
1205 | ) | |
1064 | 1206 | # --long was added in git-1.5.5 |
1065 | 1207 | if describe_out is None: |
1066 | 1208 | raise NotThisMethod("'git describe' failed") |
1067 | 1209 | describe_out = describe_out.strip() |
1068 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) | |
1210 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) | |
1069 | 1211 | if full_out is None: |
1070 | 1212 | raise NotThisMethod("'git rev-parse' failed") |
1071 | 1213 | full_out = full_out.strip() |
1075 | 1217 | pieces["short"] = full_out[:7] # maybe improved later |
1076 | 1218 | pieces["error"] = None |
1077 | 1219 | |
1220 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) | |
1221 | # --abbrev-ref was added in git-1.6.3 | |
1222 | if rc != 0 or branch_name is None: | |
1223 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") | |
1224 | branch_name = branch_name.strip() | |
1225 | ||
1226 | if branch_name == "HEAD": | |
1227 | # If we aren't exactly on a branch, pick a branch which represents | |
1228 | # the current commit. If all else fails, we are on a branchless | |
1229 | # commit. | |
1230 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) | |
1231 | # --contains was added in git-1.5.4 | |
1232 | if rc != 0 or branches is None: | |
1233 | raise NotThisMethod("'git branch --contains' returned error") | |
1234 | branches = branches.split("\n") | |
1235 | ||
1236 | # Remove the first line if we're running detached | |
1237 | if "(" in branches[0]: | |
1238 | branches.pop(0) | |
1239 | ||
1240 | # Strip off the leading "* " from the list of branches. | |
1241 | branches = [branch[2:] for branch in branches] | |
1242 | if "master" in branches: | |
1243 | branch_name = "master" | |
1244 | elif not branches: | |
1245 | branch_name = None | |
1246 | else: | |
1247 | # Pick the first branch that is returned. Good or bad. | |
1248 | branch_name = branches[0] | |
1249 | ||
1250 | pieces["branch"] = branch_name | |
1251 | ||
1078 | 1252 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] |
1079 | 1253 | # TAG might have hyphens. |
1080 | 1254 | git_describe = describe_out |
1083 | 1257 | dirty = git_describe.endswith("-dirty") |
1084 | 1258 | pieces["dirty"] = dirty |
1085 | 1259 | if dirty: |
1086 | git_describe = git_describe[:git_describe.rindex("-dirty")] | |
1260 | git_describe = git_describe[: git_describe.rindex("-dirty")] | |
1087 | 1261 | |
1088 | 1262 | # now we have TAG-NUM-gHEX or HEX |
1089 | 1263 | |
1090 | 1264 | if "-" in git_describe: |
1091 | 1265 | # TAG-NUM-gHEX |
1092 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) | |
1266 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) | |
1093 | 1267 | if not mo: |
1094 | # unparseable. Maybe git-describe is misbehaving? | |
1095 | pieces["error"] = ("unable to parse git-describe output: '%s'" | |
1096 | % describe_out) | |
1268 | # unparsable. Maybe git-describe is misbehaving? | |
1269 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out | |
1097 | 1270 | return pieces |
1098 | 1271 | |
1099 | 1272 | # tag |
1102 | 1275 | if verbose: |
1103 | 1276 | fmt = "tag '%s' doesn't start with prefix '%s'" |
1104 | 1277 | print(fmt % (full_tag, tag_prefix)) |
1105 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" | |
1106 | % (full_tag, tag_prefix)) | |
1278 | pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( | |
1279 | full_tag, | |
1280 | tag_prefix, | |
1281 | ) | |
1107 | 1282 | return pieces |
1108 | pieces["closest-tag"] = full_tag[len(tag_prefix):] | |
1283 | pieces["closest-tag"] = full_tag[len(tag_prefix) :] | |
1109 | 1284 | |
1110 | 1285 | # distance: number of commits since tag |
1111 | 1286 | pieces["distance"] = int(mo.group(2)) |
1116 | 1291 | else: |
1117 | 1292 | # HEX: no tags |
1118 | 1293 | pieces["closest-tag"] = None |
1119 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], | |
1120 | cwd=root) | |
1294 | count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) | |
1121 | 1295 | pieces["distance"] = int(count_out) # total number of commits |
1296 | ||
1297 | # commit date: see ISO-8601 comment in git_versions_from_keywords() | |
1298 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() | |
1299 | # Use only the last line. Previous lines may contain GPG signature | |
1300 | # information. | |
1301 | date = date.splitlines()[-1] | |
1302 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) | |
1122 | 1303 | |
1123 | 1304 | return pieces |
1124 | 1305 | |
1127 | 1308 | """Git-specific installation logic for Versioneer. |
1128 | 1309 | |
1129 | 1310 | For Git, this means creating/changing .gitattributes to mark _version.py |
1130 | for export-time keyword substitution. | |
1311 | for export-subst keyword substitution. | |
1131 | 1312 | """ |
1132 | 1313 | GITS = ["git"] |
1133 | 1314 | if sys.platform == "win32": |
1136 | 1317 | if ipy: |
1137 | 1318 | files.append(ipy) |
1138 | 1319 | try: |
1139 | me = __file__ | |
1140 | if me.endswith(".pyc") or me.endswith(".pyo"): | |
1141 | me = os.path.splitext(me)[0] + ".py" | |
1142 | versioneer_file = os.path.relpath(me) | |
1320 | my_path = __file__ | |
1321 | if my_path.endswith(".pyc") or my_path.endswith(".pyo"): | |
1322 | my_path = os.path.splitext(my_path)[0] + ".py" | |
1323 | versioneer_file = os.path.relpath(my_path) | |
1143 | 1324 | except NameError: |
1144 | 1325 | versioneer_file = "versioneer.py" |
1145 | 1326 | files.append(versioneer_file) |
1146 | 1327 | present = False |
1147 | 1328 | try: |
1148 | f = open(".gitattributes", "r") | |
1149 | for line in f.readlines(): | |
1150 | if line.strip().startswith(versionfile_source): | |
1151 | if "export-subst" in line.strip().split()[1:]: | |
1152 | present = True | |
1153 | f.close() | |
1154 | except EnvironmentError: | |
1329 | with open(".gitattributes", "r") as fobj: | |
1330 | for line in fobj: | |
1331 | if line.strip().startswith(versionfile_source): | |
1332 | if "export-subst" in line.strip().split()[1:]: | |
1333 | present = True | |
1334 | break | |
1335 | except OSError: | |
1155 | 1336 | pass |
1156 | 1337 | if not present: |
1157 | f = open(".gitattributes", "a+") | |
1158 | f.write("%s export-subst\n" % versionfile_source) | |
1159 | f.close() | |
1338 | with open(".gitattributes", "a+") as fobj: | |
1339 | fobj.write(f"{versionfile_source} export-subst\n") | |
1160 | 1340 | files.append(".gitattributes") |
1161 | 1341 | run_command(GITS, ["add", "--"] + files) |
1162 | 1342 | |
1164 | 1344 | def versions_from_parentdir(parentdir_prefix, root, verbose): |
1165 | 1345 | """Try to determine the version from the parent directory name. |
1166 | 1346 | |
1167 | Source tarballs conventionally unpack into a directory that includes | |
1168 | both the project name and a version string. | |
1169 | """ | |
1170 | dirname = os.path.basename(root) | |
1171 | if not dirname.startswith(parentdir_prefix): | |
1172 | if verbose: | |
1173 | print("guessing rootdir is '%s', but '%s' doesn't start with " | |
1174 | "prefix '%s'" % (root, dirname, parentdir_prefix)) | |
1175 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
1176 | return {"version": dirname[len(parentdir_prefix):], | |
1177 | "full-revisionid": None, | |
1178 | "dirty": False, "error": None} | |
1347 | Source tarballs conventionally unpack into a directory that includes both | |
1348 | the project name and a version string. We will also support searching up | |
1349 | two directory levels for an appropriately named parent directory | |
1350 | """ | |
1351 | rootdirs = [] | |
1352 | ||
1353 | for _ in range(3): | |
1354 | dirname = os.path.basename(root) | |
1355 | if dirname.startswith(parentdir_prefix): | |
1356 | return { | |
1357 | "version": dirname[len(parentdir_prefix) :], | |
1358 | "full-revisionid": None, | |
1359 | "dirty": False, | |
1360 | "error": None, | |
1361 | "date": None, | |
1362 | } | |
1363 | rootdirs.append(root) | |
1364 | root = os.path.dirname(root) # up a level | |
1365 | ||
1366 | if verbose: | |
1367 | print( | |
1368 | "Tried directories %s but none started with prefix %s" | |
1369 | % (str(rootdirs), parentdir_prefix) | |
1370 | ) | |
1371 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") | |
1372 | ||
1179 | 1373 | |
1180 | 1374 | SHORT_VERSION_PY = """ |
1181 | # This file was generated by 'versioneer.py' (0.16) from | |
1375 | # This file was generated by 'versioneer.py' (0.21) from | |
1182 | 1376 | # revision-control system data, or from the parent directory name of an |
1183 | 1377 | # unpacked source archive. Distribution tarballs contain a pre-generated copy |
1184 | 1378 | # of this file. |
1185 | 1379 | |
1186 | 1380 | import json |
1187 | import sys | |
1188 | 1381 | |
1189 | 1382 | version_json = ''' |
1190 | 1383 | %s |
1201 | 1394 | try: |
1202 | 1395 | with open(filename) as f: |
1203 | 1396 | contents = f.read() |
1204 | except EnvironmentError: | |
1397 | except OSError: | |
1205 | 1398 | raise NotThisMethod("unable to read _version.py") |
1206 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", | |
1207 | contents, re.M | re.S) | |
1399 | mo = re.search( | |
1400 | r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S | |
1401 | ) | |
1402 | if not mo: | |
1403 | mo = re.search( | |
1404 | r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S | |
1405 | ) | |
1208 | 1406 | if not mo: |
1209 | 1407 | raise NotThisMethod("no version_json in _version.py") |
1210 | 1408 | return json.loads(mo.group(1)) |
1213 | 1411 | def write_to_version_file(filename, versions): |
1214 | 1412 | """Write the given version number to the given _version.py file.""" |
1215 | 1413 | os.unlink(filename) |
1216 | contents = json.dumps(versions, sort_keys=True, | |
1217 | indent=1, separators=(",", ": ")) | |
1414 | contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) | |
1218 | 1415 | with open(filename, "w") as f: |
1219 | 1416 | f.write(SHORT_VERSION_PY % contents) |
1220 | 1417 | |
1246 | 1443 | rendered += ".dirty" |
1247 | 1444 | else: |
1248 | 1445 | # exception #1 |
1249 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], | |
1250 | pieces["short"]) | |
1446 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) | |
1251 | 1447 | if pieces["dirty"]: |
1252 | 1448 | rendered += ".dirty" |
1253 | 1449 | return rendered |
1254 | 1450 | |
1255 | 1451 | |
1256 | def render_pep440_pre(pieces): | |
1257 | """TAG[.post.devDISTANCE] -- No -dirty. | |
1452 | def render_pep440_branch(pieces): | |
1453 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . | |
1454 | ||
1455 | The ".dev0" means not master branch. Note that .dev0 sorts backwards | |
1456 | (a feature branch will appear "older" than the master branch). | |
1258 | 1457 | |
1259 | 1458 | Exceptions: |
1260 | 1: no tags. 0.post.devDISTANCE | |
1459 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] | |
1261 | 1460 | """ |
1262 | 1461 | if pieces["closest-tag"]: |
1263 | 1462 | rendered = pieces["closest-tag"] |
1463 | if pieces["distance"] or pieces["dirty"]: | |
1464 | if pieces["branch"] != "master": | |
1465 | rendered += ".dev0" | |
1466 | rendered += plus_or_dot(pieces) | |
1467 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) | |
1468 | if pieces["dirty"]: | |
1469 | rendered += ".dirty" | |
1470 | else: | |
1471 | # exception #1 | |
1472 | rendered = "0" | |
1473 | if pieces["branch"] != "master": | |
1474 | rendered += ".dev0" | |
1475 | rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) | |
1476 | if pieces["dirty"]: | |
1477 | rendered += ".dirty" | |
1478 | return rendered | |
1479 | ||
1480 | ||
1481 | def pep440_split_post(ver): | |
1482 | """Split pep440 version string at the post-release segment. | |
1483 | ||
1484 | Returns the release segments before the post-release and the | |
1485 | post-release version number (or -1 if no post-release segment is present). | |
1486 | """ | |
1487 | vc = str.split(ver, ".post") | |
1488 | return vc[0], int(vc[1] or 0) if len(vc) == 2 else None | |
1489 | ||
1490 | ||
1491 | def render_pep440_pre(pieces): | |
1492 | """TAG[.postN.devDISTANCE] -- No -dirty. | |
1493 | ||
1494 | Exceptions: | |
1495 | 1: no tags. 0.post0.devDISTANCE | |
1496 | """ | |
1497 | if pieces["closest-tag"]: | |
1264 | 1498 | if pieces["distance"]: |
1265 | rendered += ".post.dev%d" % pieces["distance"] | |
1499 | # update the post release segment | |
1500 | tag_version, post_version = pep440_split_post(pieces["closest-tag"]) | |
1501 | rendered = tag_version | |
1502 | if post_version is not None: | |
1503 | rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) | |
1504 | else: | |
1505 | rendered += ".post0.dev%d" % (pieces["distance"]) | |
1506 | else: | |
1507 | # no commits, use the tag as the version | |
1508 | rendered = pieces["closest-tag"] | |
1266 | 1509 | else: |
1267 | 1510 | # exception #1 |
1268 | rendered = "0.post.dev%d" % pieces["distance"] | |
1511 | rendered = "0.post0.dev%d" % pieces["distance"] | |
1269 | 1512 | return rendered |
1270 | 1513 | |
1271 | 1514 | |
1296 | 1539 | return rendered |
1297 | 1540 | |
1298 | 1541 | |
1542 | def render_pep440_post_branch(pieces): | |
1543 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . | |
1544 | ||
1545 | The ".dev0" means not master branch. | |
1546 | ||
1547 | Exceptions: | |
1548 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] | |
1549 | """ | |
1550 | if pieces["closest-tag"]: | |
1551 | rendered = pieces["closest-tag"] | |
1552 | if pieces["distance"] or pieces["dirty"]: | |
1553 | rendered += ".post%d" % pieces["distance"] | |
1554 | if pieces["branch"] != "master": | |
1555 | rendered += ".dev0" | |
1556 | rendered += plus_or_dot(pieces) | |
1557 | rendered += "g%s" % pieces["short"] | |
1558 | if pieces["dirty"]: | |
1559 | rendered += ".dirty" | |
1560 | else: | |
1561 | # exception #1 | |
1562 | rendered = "0.post%d" % pieces["distance"] | |
1563 | if pieces["branch"] != "master": | |
1564 | rendered += ".dev0" | |
1565 | rendered += "+g%s" % pieces["short"] | |
1566 | if pieces["dirty"]: | |
1567 | rendered += ".dirty" | |
1568 | return rendered | |
1569 | ||
1570 | ||
1299 | 1571 | def render_pep440_old(pieces): |
1300 | 1572 | """TAG[.postDISTANCE[.dev0]] . |
1301 | 1573 | |
1302 | 1574 | The ".dev0" means dirty. |
1303 | 1575 | |
1304 | Eexceptions: | |
1576 | Exceptions: | |
1305 | 1577 | 1: no tags. 0.postDISTANCE[.dev0] |
1306 | 1578 | """ |
1307 | 1579 | if pieces["closest-tag"]: |
1361 | 1633 | def render(pieces, style): |
1362 | 1634 | """Render the given version pieces into the requested style.""" |
1363 | 1635 | if pieces["error"]: |
1364 | return {"version": "unknown", | |
1365 | "full-revisionid": pieces.get("long"), | |
1366 | "dirty": None, | |
1367 | "error": pieces["error"]} | |
1636 | return { | |
1637 | "version": "unknown", | |
1638 | "full-revisionid": pieces.get("long"), | |
1639 | "dirty": None, | |
1640 | "error": pieces["error"], | |
1641 | "date": None, | |
1642 | } | |
1368 | 1643 | |
1369 | 1644 | if not style or style == "default": |
1370 | 1645 | style = "pep440" # the default |
1371 | 1646 | |
1372 | 1647 | if style == "pep440": |
1373 | 1648 | rendered = render_pep440(pieces) |
1649 | elif style == "pep440-branch": | |
1650 | rendered = render_pep440_branch(pieces) | |
1374 | 1651 | elif style == "pep440-pre": |
1375 | 1652 | rendered = render_pep440_pre(pieces) |
1376 | 1653 | elif style == "pep440-post": |
1377 | 1654 | rendered = render_pep440_post(pieces) |
1655 | elif style == "pep440-post-branch": | |
1656 | rendered = render_pep440_post_branch(pieces) | |
1378 | 1657 | elif style == "pep440-old": |
1379 | 1658 | rendered = render_pep440_old(pieces) |
1380 | 1659 | elif style == "git-describe": |
1384 | 1663 | else: |
1385 | 1664 | raise ValueError("unknown style '%s'" % style) |
1386 | 1665 | |
1387 | return {"version": rendered, "full-revisionid": pieces["long"], | |
1388 | "dirty": pieces["dirty"], "error": None} | |
1666 | return { | |
1667 | "version": rendered, | |
1668 | "full-revisionid": pieces["long"], | |
1669 | "dirty": pieces["dirty"], | |
1670 | "error": None, | |
1671 | "date": pieces.get("date"), | |
1672 | } | |
1389 | 1673 | |
1390 | 1674 | |
1391 | 1675 | class VersioneerBadRootError(Exception): |
1408 | 1692 | handlers = HANDLERS.get(cfg.VCS) |
1409 | 1693 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS |
1410 | 1694 | verbose = verbose or cfg.verbose |
1411 | assert cfg.versionfile_source is not None, \ | |
1412 | "please set versioneer.versionfile_source" | |
1695 | assert ( | |
1696 | cfg.versionfile_source is not None | |
1697 | ), "please set versioneer.versionfile_source" | |
1413 | 1698 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" |
1414 | 1699 | |
1415 | 1700 | versionfile_abs = os.path.join(root, cfg.versionfile_source) |
1463 | 1748 | if verbose: |
1464 | 1749 | print("unable to compute version") |
1465 | 1750 | |
1466 | return {"version": "0+unknown", "full-revisionid": None, | |
1467 | "dirty": None, "error": "unable to compute version"} | |
1751 | return { | |
1752 | "version": "0+unknown", | |
1753 | "full-revisionid": None, | |
1754 | "dirty": None, | |
1755 | "error": "unable to compute version", | |
1756 | "date": None, | |
1757 | } | |
1468 | 1758 | |
1469 | 1759 | |
1470 | 1760 | def get_version(): |
1472 | 1762 | return get_versions()["version"] |
1473 | 1763 | |
1474 | 1764 | |
1475 | def get_cmdclass(): | |
1476 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" | |
1765 | def get_cmdclass(cmdclass=None): | |
1766 | """Get the custom setuptools/distutils subclasses used by Versioneer. | |
1767 | ||
1768 | If the package uses a different cmdclass (e.g. one from numpy), it | |
1769 | should be provide as an argument. | |
1770 | """ | |
1477 | 1771 | if "versioneer" in sys.modules: |
1478 | 1772 | del sys.modules["versioneer"] |
1479 | 1773 | # this fixes the "python setup.py develop" case (also 'install' and |
1487 | 1781 | # parent is protected against the child's "import versioneer". By |
1488 | 1782 | # removing ourselves from sys.modules here, before the child build |
1489 | 1783 | # happens, we protect the child from the parent's versioneer too. |
1490 | # Also see https://github.com/warner/python-versioneer/issues/52 | |
1491 | ||
1492 | cmds = {} | |
1784 | # Also see https://github.com/python-versioneer/python-versioneer/issues/52 | |
1785 | ||
1786 | cmds = {} if cmdclass is None else cmdclass.copy() | |
1493 | 1787 | |
1494 | 1788 | # we add "version" to both distutils and setuptools |
1495 | 1789 | from distutils.core import Command |
1510 | 1804 | print("Version: %s" % vers["version"]) |
1511 | 1805 | print(" full-revisionid: %s" % vers.get("full-revisionid")) |
1512 | 1806 | print(" dirty: %s" % vers.get("dirty")) |
1807 | print(" date: %s" % vers.get("date")) | |
1513 | 1808 | if vers["error"]: |
1514 | 1809 | print(" error: %s" % vers["error"]) |
1810 | ||
1515 | 1811 | cmds["version"] = cmd_version |
1516 | 1812 | |
1517 | 1813 | # we override "build_py" in both distutils and setuptools |
1523 | 1819 | # setuptools/bdist_egg -> distutils/install_lib -> build_py |
1524 | 1820 | # setuptools/install -> bdist_egg ->.. |
1525 | 1821 | # setuptools/develop -> ? |
1822 | # pip install: | |
1823 | # copies source tree to a tempdir before running egg_info/etc | |
1824 | # if .git isn't copied too, 'git describe' will fail | |
1825 | # then does setup.py bdist_wheel, or sometimes setup.py install | |
1826 | # setup.py egg_info -> ? | |
1526 | 1827 | |
1527 | 1828 | # we override different "build_py" commands for both environments |
1528 | if "setuptools" in sys.modules: | |
1829 | if "build_py" in cmds: | |
1830 | _build_py = cmds["build_py"] | |
1831 | elif "setuptools" in sys.modules: | |
1529 | 1832 | from setuptools.command.build_py import build_py as _build_py |
1530 | 1833 | else: |
1531 | 1834 | from distutils.command.build_py import build_py as _build_py |
1539 | 1842 | # now locate _version.py in the new build/ directory and replace |
1540 | 1843 | # it with an updated value |
1541 | 1844 | if cfg.versionfile_build: |
1542 | target_versionfile = os.path.join(self.build_lib, | |
1543 | cfg.versionfile_build) | |
1845 | target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) | |
1544 | 1846 | print("UPDATING %s" % target_versionfile) |
1545 | 1847 | write_to_version_file(target_versionfile, versions) |
1848 | ||
1546 | 1849 | cmds["build_py"] = cmd_build_py |
1850 | ||
1851 | if "build_ext" in cmds: | |
1852 | _build_ext = cmds["build_ext"] | |
1853 | elif "setuptools" in sys.modules: | |
1854 | from setuptools.command.build_ext import build_ext as _build_ext | |
1855 | else: | |
1856 | from distutils.command.build_ext import build_ext as _build_ext | |
1857 | ||
1858 | class cmd_build_ext(_build_ext): | |
1859 | def run(self): | |
1860 | root = get_root() | |
1861 | cfg = get_config_from_root(root) | |
1862 | versions = get_versions() | |
1863 | _build_ext.run(self) | |
1864 | if self.inplace: | |
1865 | # build_ext --inplace will only build extensions in | |
1866 | # build/lib<..> dir with no _version.py to write to. | |
1867 | # As in place builds will already have a _version.py | |
1868 | # in the module dir, we do not need to write one. | |
1869 | return | |
1870 | # now locate _version.py in the new build/ directory and replace | |
1871 | # it with an updated value | |
1872 | target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) | |
1873 | print("UPDATING %s" % target_versionfile) | |
1874 | write_to_version_file(target_versionfile, versions) | |
1875 | ||
1876 | cmds["build_ext"] = cmd_build_ext | |
1547 | 1877 | |
1548 | 1878 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? |
1549 | 1879 | from cx_Freeze.dist import build_exe as _build_exe |
1880 | ||
1881 | # nczeczulin reports that py2exe won't like the pep440-style string | |
1882 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. | |
1883 | # setup(console=[{ | |
1884 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION | |
1885 | # "product_version": versioneer.get_version(), | |
1886 | # ... | |
1550 | 1887 | |
1551 | 1888 | class cmd_build_exe(_build_exe): |
1552 | 1889 | def run(self): |
1561 | 1898 | os.unlink(target_versionfile) |
1562 | 1899 | with open(cfg.versionfile_source, "w") as f: |
1563 | 1900 | LONG = LONG_VERSION_PY[cfg.VCS] |
1564 | f.write(LONG % | |
1565 | {"DOLLAR": "$", | |
1566 | "STYLE": cfg.style, | |
1567 | "TAG_PREFIX": cfg.tag_prefix, | |
1568 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, | |
1569 | "VERSIONFILE_SOURCE": cfg.versionfile_source, | |
1570 | }) | |
1901 | f.write( | |
1902 | LONG | |
1903 | % { | |
1904 | "DOLLAR": "$", | |
1905 | "STYLE": cfg.style, | |
1906 | "TAG_PREFIX": cfg.tag_prefix, | |
1907 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, | |
1908 | "VERSIONFILE_SOURCE": cfg.versionfile_source, | |
1909 | } | |
1910 | ) | |
1911 | ||
1571 | 1912 | cmds["build_exe"] = cmd_build_exe |
1572 | 1913 | del cmds["build_py"] |
1573 | 1914 | |
1915 | if "py2exe" in sys.modules: # py2exe enabled? | |
1916 | from py2exe.distutils_buildexe import py2exe as _py2exe | |
1917 | ||
1918 | class cmd_py2exe(_py2exe): | |
1919 | def run(self): | |
1920 | root = get_root() | |
1921 | cfg = get_config_from_root(root) | |
1922 | versions = get_versions() | |
1923 | target_versionfile = cfg.versionfile_source | |
1924 | print("UPDATING %s" % target_versionfile) | |
1925 | write_to_version_file(target_versionfile, versions) | |
1926 | ||
1927 | _py2exe.run(self) | |
1928 | os.unlink(target_versionfile) | |
1929 | with open(cfg.versionfile_source, "w") as f: | |
1930 | LONG = LONG_VERSION_PY[cfg.VCS] | |
1931 | f.write( | |
1932 | LONG | |
1933 | % { | |
1934 | "DOLLAR": "$", | |
1935 | "STYLE": cfg.style, | |
1936 | "TAG_PREFIX": cfg.tag_prefix, | |
1937 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, | |
1938 | "VERSIONFILE_SOURCE": cfg.versionfile_source, | |
1939 | } | |
1940 | ) | |
1941 | ||
1942 | cmds["py2exe"] = cmd_py2exe | |
1943 | ||
1574 | 1944 | # we override different "sdist" commands for both environments |
1575 | if "setuptools" in sys.modules: | |
1945 | if "sdist" in cmds: | |
1946 | _sdist = cmds["sdist"] | |
1947 | elif "setuptools" in sys.modules: | |
1576 | 1948 | from setuptools.command.sdist import sdist as _sdist |
1577 | 1949 | else: |
1578 | 1950 | from distutils.command.sdist import sdist as _sdist |
1595 | 1967 | # updated value |
1596 | 1968 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) |
1597 | 1969 | print("UPDATING %s" % target_versionfile) |
1598 | write_to_version_file(target_versionfile, | |
1599 | self._versioneer_generated_versions) | |
1970 | write_to_version_file( | |
1971 | target_versionfile, self._versioneer_generated_versions | |
1972 | ) | |
1973 | ||
1600 | 1974 | cmds["sdist"] = cmd_sdist |
1601 | 1975 | |
1602 | 1976 | return cmds |
1639 | 2013 | |
1640 | 2014 | """ |
1641 | 2015 | |
1642 | INIT_PY_SNIPPET = """ | |
2016 | OLD_SNIPPET = """ | |
1643 | 2017 | from ._version import get_versions |
1644 | 2018 | __version__ = get_versions()['version'] |
1645 | 2019 | del get_versions |
1646 | 2020 | """ |
1647 | 2021 | |
2022 | INIT_PY_SNIPPET = """ | |
2023 | from . import {0} | |
2024 | __version__ = {0}.get_versions()['version'] | |
2025 | """ | |
2026 | ||
1648 | 2027 | |
1649 | 2028 | def do_setup(): |
1650 | """Main VCS-independent setup function for installing Versioneer.""" | |
2029 | """Do main VCS-independent setup function for installing Versioneer.""" | |
1651 | 2030 | root = get_root() |
1652 | 2031 | try: |
1653 | 2032 | cfg = get_config_from_root(root) |
1654 | except (EnvironmentError, configparser.NoSectionError, | |
1655 | configparser.NoOptionError) as e: | |
1656 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): | |
1657 | print("Adding sample versioneer config to setup.cfg", | |
1658 | file=sys.stderr) | |
2033 | except (OSError, configparser.NoSectionError, configparser.NoOptionError) as e: | |
2034 | if isinstance(e, (OSError, configparser.NoSectionError)): | |
2035 | print("Adding sample versioneer config to setup.cfg", file=sys.stderr) | |
1659 | 2036 | with open(os.path.join(root, "setup.cfg"), "a") as f: |
1660 | 2037 | f.write(SAMPLE_CONFIG) |
1661 | 2038 | print(CONFIG_ERROR, file=sys.stderr) |
1664 | 2041 | print(" creating %s" % cfg.versionfile_source) |
1665 | 2042 | with open(cfg.versionfile_source, "w") as f: |
1666 | 2043 | LONG = LONG_VERSION_PY[cfg.VCS] |
1667 | f.write(LONG % {"DOLLAR": "$", | |
1668 | "STYLE": cfg.style, | |
1669 | "TAG_PREFIX": cfg.tag_prefix, | |
1670 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, | |
1671 | "VERSIONFILE_SOURCE": cfg.versionfile_source, | |
1672 | }) | |
1673 | ||
1674 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), | |
1675 | "__init__.py") | |
2044 | f.write( | |
2045 | LONG | |
2046 | % { | |
2047 | "DOLLAR": "$", | |
2048 | "STYLE": cfg.style, | |
2049 | "TAG_PREFIX": cfg.tag_prefix, | |
2050 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, | |
2051 | "VERSIONFILE_SOURCE": cfg.versionfile_source, | |
2052 | } | |
2053 | ) | |
2054 | ||
2055 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") | |
1676 | 2056 | if os.path.exists(ipy): |
1677 | 2057 | try: |
1678 | 2058 | with open(ipy, "r") as f: |
1679 | 2059 | old = f.read() |
1680 | except EnvironmentError: | |
2060 | except OSError: | |
1681 | 2061 | old = "" |
1682 | if INIT_PY_SNIPPET not in old: | |
2062 | module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] | |
2063 | snippet = INIT_PY_SNIPPET.format(module) | |
2064 | if OLD_SNIPPET in old: | |
2065 | print(" replacing boilerplate in %s" % ipy) | |
2066 | with open(ipy, "w") as f: | |
2067 | f.write(old.replace(OLD_SNIPPET, snippet)) | |
2068 | elif snippet not in old: | |
1683 | 2069 | print(" appending to %s" % ipy) |
1684 | 2070 | with open(ipy, "a") as f: |
1685 | f.write(INIT_PY_SNIPPET) | |
2071 | f.write(snippet) | |
1686 | 2072 | else: |
1687 | 2073 | print(" %s unmodified" % ipy) |
1688 | 2074 | else: |
1701 | 2087 | if line.startswith("include "): |
1702 | 2088 | for include in line.split()[1:]: |
1703 | 2089 | simple_includes.add(include) |
1704 | except EnvironmentError: | |
2090 | except OSError: | |
1705 | 2091 | pass |
1706 | 2092 | # That doesn't cover everything MANIFEST.in can do |
1707 | 2093 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so |
1714 | 2100 | else: |
1715 | 2101 | print(" 'versioneer.py' already in MANIFEST.in") |
1716 | 2102 | if cfg.versionfile_source not in simple_includes: |
1717 | print(" appending versionfile_source ('%s') to MANIFEST.in" % | |
1718 | cfg.versionfile_source) | |
2103 | print( | |
2104 | " appending versionfile_source ('%s') to MANIFEST.in" | |
2105 | % cfg.versionfile_source | |
2106 | ) | |
1719 | 2107 | with open(manifest_in, "a") as f: |
1720 | 2108 | f.write("include %s\n" % cfg.versionfile_source) |
1721 | 2109 | else: |
1722 | 2110 | print(" versionfile_source already in MANIFEST.in") |
1723 | 2111 | |
1724 | 2112 | # Make VCS-specific changes. For git, this means creating/changing |
1725 | # .gitattributes to mark _version.py for export-time keyword | |
2113 | # .gitattributes to mark _version.py for export-subst keyword | |
1726 | 2114 | # substitution. |
1727 | 2115 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) |
1728 | 2116 | return 0 |
1764 | 2152 | errors += 1 |
1765 | 2153 | return errors |
1766 | 2154 | |
2155 | ||
1767 | 2156 | if __name__ == "__main__": |
1768 | 2157 | cmd = sys.argv[1] |
1769 | 2158 | if cmd == "setup": |