[ Debian Janitor ]
New upstream snapshot.
Debian Janitor
2 years ago
0 | 1.4.4 (unreleased) | |
1 | ------------------ | |
2 | ||
3 | - Add nextUntil method | |
4 | ||
5 | ||
6 | 1.4.3 (2020-11-21) | |
7 | ------------------ | |
8 | ||
9 | - No longer use a universal wheel | |
10 | ||
11 | ||
12 | 1.4.2 (2020-11-21) | |
13 | ------------------ | |
14 | ||
15 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
16 | ||
17 | - python2 is no longer supported | |
18 | ||
19 | 1.4.1 (2019-10-26) | |
20 | ------------------ | |
21 | ||
22 | - This is the latest release with py2 support | |
23 | ||
24 | - Remove py33, py34 support | |
25 | ||
26 | - web scraping improvements: default timeout and session support | |
27 | ||
28 | - Add API methods to serialize form-related elements according to spec | |
29 | ||
30 | - Include HTML markup when querying textarea text/value | |
31 | ||
32 | ||
33 | 1.4.0 (2018-01-11) | |
34 | ------------------ | |
35 | ||
36 | - Refactoring of `.text()` to match firefox behavior. | |
37 | ||
38 | ||
39 | 1.3.0 (2017-10-21) | |
40 | ------------------ | |
41 | ||
42 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
43 | ||
44 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
45 | ||
46 | - Run tests with python3.6 on CI | |
47 | ||
48 | - Add a ``method`` argument to ``.outer_html()`` | |
49 | ||
50 | ||
51 | 1.2.17 (2016-10-14) | |
52 | ------------------- | |
53 | ||
54 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
55 | - ``PyQuery('<input>').val()`` is ``''`` | |
56 | ||
57 | ||
58 | 1.2.16 (2016-10-14) | |
59 | ------------------- | |
60 | ||
61 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
62 | ||
63 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
64 | ``'on'`` | |
65 | ||
66 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
67 | ``'on'`` | |
68 | ||
69 | - ``<select>`` without ``<option selected>`` have the value of their first | |
70 | ``<option>`` (or ``None`` if there are no options) | |
71 | ||
72 | ||
73 | 1.2.15 (2016-10-11) | |
74 | ------------------- | |
75 | ||
76 | - .val() should never raise | |
77 | ||
78 | - drop py26 support | |
79 | ||
80 | - improve .extend() by returning self | |
81 | ||
82 | ||
83 | 1.2.14 (2016-10-10) | |
84 | ------------------- | |
85 | ||
86 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
87 | ||
88 | ||
89 | 1.2.13 (2016-04-12) | |
90 | ------------------- | |
91 | ||
92 | - Note explicit support for Python 3.5 | |
93 | ||
94 | 1.2.12 (2016-04-12) | |
95 | ------------------- | |
96 | ||
97 | - make_links_absolute now take care of whitespaces | |
98 | ||
99 | - added pseudo selector :has() | |
100 | ||
101 | - add cookies arguments as allowed arguments for requests | |
102 | ||
103 | ||
104 | 1.2.11 (2016-02-02) | |
105 | ------------------- | |
106 | ||
107 | - Preserve namespaces attribute on PyQuery copies. | |
108 | ||
109 | - Do not raise an error when the http response code is 2XX | |
110 | ||
111 | 1.2.10 (2016-01-05) | |
112 | ------------------- | |
113 | ||
114 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
115 | ||
116 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
117 | ||
118 | - Fixed #112: make_links_absolute does not apply to form actions | |
119 | ||
120 | - Fixed #98: contains act like jQuery | |
121 | ||
122 | ||
0 | 123 | 1.2.9 (2014-08-22) |
1 | 124 | ------------------ |
2 | 125 | |
39 | 162 | 1.2.6 (2013-10-11) |
40 | 163 | ------------------ |
41 | 164 | |
42 | README_fixt.py was not include in the release. Fix #54. | |
165 | - README_fixt.py was not include in the release. Fix #54. | |
43 | 166 | |
44 | 167 | |
45 | 168 | 1.2.5 (2013-10-10) |
46 | 169 | ------------------ |
47 | 170 | |
48 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
49 | ||
50 | tests improvments. no longer require a eth connection. | |
51 | ||
52 | fix #55 | |
171 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
172 | ||
173 | - tests improvments. no longer require a eth connection. | |
174 | ||
175 | - fix #55 | |
53 | 176 | |
54 | 177 | 1.2.4 |
55 | 178 | ----- |
56 | 179 | |
57 | Moved to github. So a few files are renamed from .txt to .rst | |
58 | ||
59 | Added .xhtml_to_html() and .remove_namespaces() | |
60 | ||
61 | Use requests to fetch urls (if available) | |
62 | ||
63 | Use restkit's proxy instead of Paste (which will die with py3) | |
64 | ||
65 | Allow to open https urls | |
66 | ||
67 | python2.5 is no longer supported (may work, but tests are broken) | |
180 | - Moved to github. So a few files are renamed from .txt to .rst | |
181 | ||
182 | - Added .xhtml_to_html() and .remove_namespaces() | |
183 | ||
184 | - Use requests to fetch urls (if available) | |
185 | ||
186 | - Use restkit's proxy instead of Paste (which will die with py3) | |
187 | ||
188 | - Allow to open https urls | |
189 | ||
190 | - python2.5 is no longer supported (may work, but tests are broken) | |
68 | 191 | |
69 | 192 | 1.2.3 |
70 | 193 | ----- |
71 | 194 | |
72 | Allow to pass this in .filter() callback | |
73 | ||
74 | Add .contents() .items() | |
75 | ||
76 | Add tox.ini | |
77 | ||
78 | Bug fixes: fix #35 #55 #64 #66 | |
195 | - Allow to pass this in .filter() callback | |
196 | ||
197 | - Add .contents() .items() | |
198 | ||
199 | - Add tox.ini | |
200 | ||
201 | - Bug fixes: fix #35 #55 #64 #66 | |
79 | 202 | |
80 | 203 | 1.2.2 |
81 | 204 | ----- |
82 | 205 | |
83 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
84 | ||
85 | Fix issue #37 (Caleb Burns) | |
206 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
207 | ||
208 | - Fix issue #37 (Caleb Burns) | |
86 | 209 | |
87 | 210 | 1.2.1 |
88 | 211 | ----- |
89 | 212 | |
90 | Allow to use a custom css translator. | |
91 | ||
92 | Fix issue 44: case problem with xml documents | |
213 | - Allow to use a custom css translator. | |
214 | ||
215 | - Fix issue 44: case problem with xml documents | |
93 | 216 | |
94 | 217 | 1.2 |
95 | 218 | --- |
96 | 219 | |
97 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
98 | 43. | |
99 | ||
100 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
220 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
221 | ||
222 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
101 | 223 | |
102 | 224 | 1.1.1 |
103 | 225 | ----- |
104 | 226 | |
105 | Minor release. Include test file so you can run tests from the tarball. | |
227 | - Minor release. Include test file so you can run tests from the tarball. | |
106 | 228 | |
107 | 229 | |
108 | 230 | 1.1 |
109 | 231 | --- |
110 | 232 | |
111 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
233 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
112 | 234 | |
113 | 235 | |
114 | 236 | 1.0 |
115 | 237 | --- |
116 | 238 | |
117 | fix issues 24 | |
239 | - fix issues 24 | |
118 | 240 | |
119 | 241 | 0.7 |
120 | 242 | --- |
121 | 243 | |
122 | Python 3 compatible | |
123 | ||
124 | Add __unicode__ method | |
125 | ||
126 | Add root and encoding attribute | |
127 | ||
128 | fix issues 19, 20, 22, 23 | |
244 | - Python 3 compatible | |
245 | ||
246 | - Add __unicode__ method | |
247 | ||
248 | - Add root and encoding attribute | |
249 | ||
250 | - fix issues 19, 20, 22, 23 | |
129 | 251 | |
130 | 252 | 0.6.1 |
131 | 253 | ------ |
132 | 254 | |
133 | Move README.txt at package root | |
134 | ||
135 | Add CHANGES.txt and add it to long_description | |
255 | - Move README.txt at package root | |
256 | ||
257 | - Add CHANGES.txt and add it to long_description | |
136 | 258 | |
137 | 259 | 0.6 |
138 | 260 | ---- |
139 | 261 | |
140 | Added PyQuery.outerHtml | |
141 | ||
142 | Added PyQuery.fn | |
143 | ||
144 | Added PyQuery.map | |
145 | ||
146 | Change PyQuery.each behavior to reflect jQuery api | |
147 | ||
148 | ||
262 | - Added PyQuery.outerHtml | |
263 | ||
264 | - Added PyQuery.fn | |
265 | ||
266 | - Added PyQuery.map | |
267 | ||
268 | - Change PyQuery.each behavior to reflect jQuery api | |
269 | ||
270 |
0 | Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> | |
1 | ||
2 | Redistribution and use in source and binary forms, with or without | |
3 | modification, are permitted provided that the following conditions are | |
4 | met: | |
5 | ||
6 | 1. Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | ||
9 | 2. Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | ||
14 | 3. Neither the name of Infrae nor the names of its contributors may | |
15 | be used to endorse or promote products derived from this software | |
16 | without specific prior written permission. | |
17 | ||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR | |
22 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1 | 1 | prune docs/_build |
2 | 2 | graft pyquery |
3 | 3 | graft tests |
4 | include *.py | |
5 | include *.txt | |
4 | 6 | include *_fixt.py *.rst *.cfg *.ini |
5 | 7 | global-exclude *.pyc |
6 | 8 | global-exclude __pycache__ |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 2.1 | |
1 | 1 | Name: pyquery |
2 | Version: 1.2.9 | |
2 | Version: 1.4.4.dev0 | |
3 | 3 | Summary: A jquery-like library for python |
4 | 4 | Home-page: https://github.com/gawel/pyquery |
5 | Author: Gael Pasgrimaud | |
6 | Author-email: gael@gawel.org | |
5 | Author: Olivier Lauzanne | |
6 | Author-email: olauzanne@gmail.com | |
7 | Maintainer: Gael Pasgrimaud | |
8 | Maintainer-email: gael@gawel.org | |
7 | 9 | License: BSD |
8 | 10 | Description: |
9 | 11 | pyquery: a jquery-like library for python |
10 | 12 | ========================================= |
11 | 13 | |
14 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
15 | :alt: Build Status | |
16 | :target: https://travis-ci.org/gawel/pyquery | |
17 | ||
12 | 18 | pyquery allows you to make jquery queries on xml documents. |
13 | 19 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast |
14 | 20 | xml and html manipulation. |
19 | 25 | |
20 | 26 | The `project`_ is being actively developped on a git repository on Github. I |
21 | 27 | have the policy of giving push access to anyone who wants it and then to review |
22 | what he does. So if you want to contribute just email me. | |
28 | what they do. So if you want to contribute just email me. | |
23 | 29 | |
24 | 30 | Please report bugs on the `github |
25 | 31 | <https://github.com/gawel/pyquery/issues>`_ issue |
27 | 33 | |
28 | 34 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance |
29 | 35 | .. _project: https://github.com/gawel/pyquery/ |
36 | ||
37 | I've spent hours maintaining this software, with love. | |
38 | Please consider tiping if you like it: | |
39 | ||
40 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
41 | ||
42 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
43 | ||
44 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
45 | ||
46 | .. | |
47 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
30 | 48 | |
31 | 49 | Quickstart |
32 | 50 | ========== |
72 | 90 | News |
73 | 91 | ==== |
74 | 92 | |
93 | 1.4.4 (unreleased) | |
94 | ------------------ | |
95 | ||
96 | - Add nextUntil method | |
97 | ||
98 | ||
99 | 1.4.3 (2020-11-21) | |
100 | ------------------ | |
101 | ||
102 | - No longer use a universal wheel | |
103 | ||
104 | ||
105 | 1.4.2 (2020-11-21) | |
106 | ------------------ | |
107 | ||
108 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
109 | ||
110 | - python2 is no longer supported | |
111 | ||
112 | 1.4.1 (2019-10-26) | |
113 | ------------------ | |
114 | ||
115 | - This is the latest release with py2 support | |
116 | ||
117 | - Remove py33, py34 support | |
118 | ||
119 | - web scraping improvements: default timeout and session support | |
120 | ||
121 | - Add API methods to serialize form-related elements according to spec | |
122 | ||
123 | - Include HTML markup when querying textarea text/value | |
124 | ||
125 | ||
126 | 1.4.0 (2018-01-11) | |
127 | ------------------ | |
128 | ||
129 | - Refactoring of `.text()` to match firefox behavior. | |
130 | ||
131 | ||
132 | 1.3.0 (2017-10-21) | |
133 | ------------------ | |
134 | ||
135 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
136 | ||
137 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
138 | ||
139 | - Run tests with python3.6 on CI | |
140 | ||
141 | - Add a ``method`` argument to ``.outer_html()`` | |
142 | ||
143 | ||
144 | 1.2.17 (2016-10-14) | |
145 | ------------------- | |
146 | ||
147 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
148 | - ``PyQuery('<input>').val()`` is ``''`` | |
149 | ||
150 | ||
151 | 1.2.16 (2016-10-14) | |
152 | ------------------- | |
153 | ||
154 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
155 | ||
156 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
157 | ``'on'`` | |
158 | ||
159 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
160 | ``'on'`` | |
161 | ||
162 | - ``<select>`` without ``<option selected>`` have the value of their first | |
163 | ``<option>`` (or ``None`` if there are no options) | |
164 | ||
165 | ||
166 | 1.2.15 (2016-10-11) | |
167 | ------------------- | |
168 | ||
169 | - .val() should never raise | |
170 | ||
171 | - drop py26 support | |
172 | ||
173 | - improve .extend() by returning self | |
174 | ||
175 | ||
176 | 1.2.14 (2016-10-10) | |
177 | ------------------- | |
178 | ||
179 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
180 | ||
181 | ||
182 | 1.2.13 (2016-04-12) | |
183 | ------------------- | |
184 | ||
185 | - Note explicit support for Python 3.5 | |
186 | ||
187 | 1.2.12 (2016-04-12) | |
188 | ------------------- | |
189 | ||
190 | - make_links_absolute now take care of whitespaces | |
191 | ||
192 | - added pseudo selector :has() | |
193 | ||
194 | - add cookies arguments as allowed arguments for requests | |
195 | ||
196 | ||
197 | 1.2.11 (2016-02-02) | |
198 | ------------------- | |
199 | ||
200 | - Preserve namespaces attribute on PyQuery copies. | |
201 | ||
202 | - Do not raise an error when the http response code is 2XX | |
203 | ||
204 | 1.2.10 (2016-01-05) | |
205 | ------------------- | |
206 | ||
207 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
208 | ||
209 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
210 | ||
211 | - Fixed #112: make_links_absolute does not apply to form actions | |
212 | ||
213 | - Fixed #98: contains act like jQuery | |
214 | ||
215 | ||
75 | 216 | 1.2.9 (2014-08-22) |
76 | 217 | ------------------ |
77 | 218 | |
114 | 255 | 1.2.6 (2013-10-11) |
115 | 256 | ------------------ |
116 | 257 | |
117 | README_fixt.py was not include in the release. Fix #54. | |
258 | - README_fixt.py was not include in the release. Fix #54. | |
118 | 259 | |
119 | 260 | |
120 | 261 | 1.2.5 (2013-10-10) |
121 | 262 | ------------------ |
122 | 263 | |
123 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
124 | ||
125 | tests improvments. no longer require a eth connection. | |
126 | ||
127 | fix #55 | |
264 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
265 | ||
266 | - tests improvments. no longer require a eth connection. | |
267 | ||
268 | - fix #55 | |
128 | 269 | |
129 | 270 | 1.2.4 |
130 | 271 | ----- |
131 | 272 | |
132 | Moved to github. So a few files are renamed from .txt to .rst | |
133 | ||
134 | Added .xhtml_to_html() and .remove_namespaces() | |
135 | ||
136 | Use requests to fetch urls (if available) | |
137 | ||
138 | Use restkit's proxy instead of Paste (which will die with py3) | |
139 | ||
140 | Allow to open https urls | |
141 | ||
142 | python2.5 is no longer supported (may work, but tests are broken) | |
273 | - Moved to github. So a few files are renamed from .txt to .rst | |
274 | ||
275 | - Added .xhtml_to_html() and .remove_namespaces() | |
276 | ||
277 | - Use requests to fetch urls (if available) | |
278 | ||
279 | - Use restkit's proxy instead of Paste (which will die with py3) | |
280 | ||
281 | - Allow to open https urls | |
282 | ||
283 | - python2.5 is no longer supported (may work, but tests are broken) | |
143 | 284 | |
144 | 285 | 1.2.3 |
145 | 286 | ----- |
146 | 287 | |
147 | Allow to pass this in .filter() callback | |
148 | ||
149 | Add .contents() .items() | |
150 | ||
151 | Add tox.ini | |
152 | ||
153 | Bug fixes: fix #35 #55 #64 #66 | |
288 | - Allow to pass this in .filter() callback | |
289 | ||
290 | - Add .contents() .items() | |
291 | ||
292 | - Add tox.ini | |
293 | ||
294 | - Bug fixes: fix #35 #55 #64 #66 | |
154 | 295 | |
155 | 296 | 1.2.2 |
156 | 297 | ----- |
157 | 298 | |
158 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
159 | ||
160 | Fix issue #37 (Caleb Burns) | |
299 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
300 | ||
301 | - Fix issue #37 (Caleb Burns) | |
161 | 302 | |
162 | 303 | 1.2.1 |
163 | 304 | ----- |
164 | 305 | |
165 | Allow to use a custom css translator. | |
166 | ||
167 | Fix issue 44: case problem with xml documents | |
306 | - Allow to use a custom css translator. | |
307 | ||
308 | - Fix issue 44: case problem with xml documents | |
168 | 309 | |
169 | 310 | 1.2 |
170 | 311 | --- |
171 | 312 | |
172 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
173 | 43. | |
174 | ||
175 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
313 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
314 | ||
315 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
176 | 316 | |
177 | 317 | 1.1.1 |
178 | 318 | ----- |
179 | 319 | |
180 | Minor release. Include test file so you can run tests from the tarball. | |
320 | - Minor release. Include test file so you can run tests from the tarball. | |
181 | 321 | |
182 | 322 | |
183 | 323 | 1.1 |
184 | 324 | --- |
185 | 325 | |
186 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
326 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
187 | 327 | |
188 | 328 | |
189 | 329 | 1.0 |
190 | 330 | --- |
191 | 331 | |
192 | fix issues 24 | |
332 | - fix issues 24 | |
193 | 333 | |
194 | 334 | 0.7 |
195 | 335 | --- |
196 | 336 | |
197 | Python 3 compatible | |
198 | ||
199 | Add __unicode__ method | |
200 | ||
201 | Add root and encoding attribute | |
202 | ||
203 | fix issues 19, 20, 22, 23 | |
337 | - Python 3 compatible | |
338 | ||
339 | - Add __unicode__ method | |
340 | ||
341 | - Add root and encoding attribute | |
342 | ||
343 | - fix issues 19, 20, 22, 23 | |
204 | 344 | |
205 | 345 | 0.6.1 |
206 | 346 | ------ |
207 | 347 | |
208 | Move README.txt at package root | |
209 | ||
210 | Add CHANGES.txt and add it to long_description | |
348 | - Move README.txt at package root | |
349 | ||
350 | - Add CHANGES.txt and add it to long_description | |
211 | 351 | |
212 | 352 | 0.6 |
213 | 353 | ---- |
214 | 354 | |
215 | Added PyQuery.outerHtml | |
216 | ||
217 | Added PyQuery.fn | |
218 | ||
219 | Added PyQuery.map | |
220 | ||
221 | Change PyQuery.each behavior to reflect jQuery api | |
355 | - Added PyQuery.outerHtml | |
356 | ||
357 | - Added PyQuery.fn | |
358 | ||
359 | - Added PyQuery.map | |
360 | ||
361 | - Change PyQuery.each behavior to reflect jQuery api | |
222 | 362 | |
223 | 363 | |
224 | 364 | |
228 | 368 | Platform: UNKNOWN |
229 | 369 | Classifier: Intended Audience :: Developers |
230 | 370 | Classifier: Development Status :: 5 - Production/Stable |
231 | Classifier: Programming Language :: Python :: 2 | |
232 | Classifier: Programming Language :: Python :: 2.6 | |
233 | Classifier: Programming Language :: Python :: 2.7 | |
234 | 371 | Classifier: Programming Language :: Python :: 3 |
235 | Classifier: Programming Language :: Python :: 3.3 | |
236 | Classifier: Programming Language :: Python :: 3.4 | |
372 | Classifier: Programming Language :: Python :: 3.5 | |
373 | Classifier: Programming Language :: Python :: 3.6 | |
374 | Classifier: Programming Language :: Python :: 3.7 | |
375 | Provides-Extra: test |
0 | 0 | pyquery: a jquery-like library for python |
1 | 1 | ========================================= |
2 | ||
3 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
4 | :alt: Build Status | |
5 | :target: https://travis-ci.org/gawel/pyquery | |
2 | 6 | |
3 | 7 | pyquery allows you to make jquery queries on xml documents. |
4 | 8 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast |
10 | 14 | |
11 | 15 | The `project`_ is being actively developped on a git repository on Github. I |
12 | 16 | have the policy of giving push access to anyone who wants it and then to review |
13 | what he does. So if you want to contribute just email me. | |
17 | what they do. So if you want to contribute just email me. | |
14 | 18 | |
15 | 19 | Please report bugs on the `github |
16 | 20 | <https://github.com/gawel/pyquery/issues>`_ issue |
18 | 22 | |
19 | 23 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance |
20 | 24 | .. _project: https://github.com/gawel/pyquery/ |
25 | ||
26 | I've spent hours maintaining this software, with love. | |
27 | Please consider tiping if you like it: | |
28 | ||
29 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
30 | ||
31 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
32 | ||
33 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
34 | ||
35 | .. | |
36 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
21 | 37 | |
22 | 38 | Quickstart |
23 | 39 | ========== |
0 | [buildout] | |
1 | newest = false | |
2 | parts = py2 docs | |
3 | develop = . | |
4 | ||
5 | [py3] | |
6 | recipe = zc.recipe.egg | |
7 | eggs = | |
8 | cssselect>0.7.9 | |
9 | WebOb>1.1.9 | |
10 | WebTest | |
11 | pyquery | |
12 | nose | |
13 | coverage | |
14 | ||
15 | [py2] | |
16 | recipe = zc.recipe.egg | |
17 | eggs = | |
18 | ${py3:eggs} | |
19 | unittest2 | |
20 | BeautifulSoup | |
21 | restkit | |
22 | ||
23 | ||
24 | [docs] | |
25 | recipe = zc.recipe.egg | |
26 | eggs = | |
27 | ${py2:eggs} | |
28 | Pygments | |
29 | Sphinx | |
30 | sphinx-pypi-upload | |
31 | interpreter = py | |
32 | scripts = | |
33 | sphinx-build | |
34 | ||
35 | [tox] | |
36 | recipe = gp.recipe.tox |
0 | import os | |
1 | import pytest | |
2 | from webtest import http | |
3 | from webtest.debugapp import debug_app | |
4 | from urllib.request import urlopen | |
5 | ||
6 | ||
7 | @pytest.fixture | |
8 | def readme_fixt(): | |
9 | server = http.StopableWSGIServer.create(debug_app) | |
10 | server.wait() | |
11 | path_to_html_file = os.path.join('tests', 'test.html') | |
12 | yield ( | |
13 | urlopen, | |
14 | server.application_url, | |
15 | path_to_html_file, | |
16 | ) | |
17 | server.shutdown() |
0 | pyquery (1.2.9-5) UNRELEASED; urgency=medium | |
0 | pyquery (1.4.3+git20210508.1.7096042-1) UNRELEASED; urgency=medium | |
1 | 1 | |
2 | 2 | [ Debian Janitor ] |
3 | 3 | * Bump debhelper from old 9 to 12. |
10 | 10 | * d/control: Update Vcs-* fields with new Debian Python Team Salsa |
11 | 11 | layout. |
12 | 12 | |
13 | -- Debian Janitor <janitor@jelmer.uk> Mon, 13 Apr 2020 21:25:33 +0000 | |
13 | [ Debian Janitor ] | |
14 | * New upstream snapshot. | |
15 | ||
16 | -- Debian Janitor <janitor@jelmer.uk> Tue, 08 Jun 2021 09:43:21 -0000 | |
14 | 17 | |
15 | 18 | pyquery (1.2.9-4) unstable; urgency=medium |
16 | 19 |
0 | ============================================= | |
1 | :mod:`pyquery.ajax` -- PyQuery AJAX extension | |
2 | ============================================= | |
3 | ||
4 | .. automodule:: pyquery.ajax | |
5 | ||
6 | ||
7 | .. fake imports | |
8 | ||
9 | >>> from pyquery.ajax import PyQuery as pq | |
10 | ||
11 | You can query some wsgi app if `WebOb`_ is installed (it's not a pyquery | |
12 | dependencie). IN this example the test app returns a simple input at `/` and a | |
13 | submit button at `/submit`:: | |
14 | ||
15 | >>> d = pq('<form></form>', app=input_app) | |
16 | >>> d.append(d.get('/')) | |
17 | [<form>] | |
18 | >>> print(d) | |
19 | <form><input name="youyou" type="text" value=""/></form> | |
20 | ||
21 | The app is also available in new nodes:: | |
22 | ||
23 | >>> d.get('/').app is d.app is d('form').app | |
24 | True | |
25 | ||
26 | You can also request another path:: | |
27 | ||
28 | >>> d.append(d.get('/submit')) | |
29 | [<form>] | |
30 | >>> print(d) | |
31 | <form><input name="youyou" type="text" value=""/><input type="submit" value="OK"/></form> | |
32 | ||
33 | If `restkit`_ is installed, you are able to get url directly with a `HostProxy`_ app:: | |
34 | ||
35 | >>> a = d.get(your_url) | |
36 | >>> a | |
37 | [<html>] | |
38 | ||
39 | You can retrieve the app response:: | |
40 | ||
41 | >>> print(a.response.status) | |
42 | 200 OK | |
43 | ||
44 | The response attribute is a `WebOb`_ `Response`_ | |
45 | ||
46 | .. _webob: http://pythonpaste.org/webob/ | |
47 | .. _response: http://pythonpaste.org/webob/#response | |
48 | .. _restkit: http://benoitc.github.com/restkit/ | |
49 | .. _hostproxy: http://benoitc.github.com/restkit/wsgi_proxy.html | |
50 | ||
51 | Api | |
52 | --- | |
53 | ||
54 | .. autoclass:: PyQuery | |
55 | :members: | |
56 |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | import sys | |
3 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
4 | from webtest import http | |
5 | from doctest import SKIP | |
6 | from tests.apps import input_app | |
7 | ||
8 | PY3 = sys.version_info >= (3,) | |
9 | ||
10 | ||
11 | def setup_test(test): | |
12 | for example in test.examples: | |
13 | # urlopen as moved in py3 | |
14 | if PY3: | |
15 | example.options.setdefault(SKIP, 1) | |
16 | if not PY3: | |
17 | server = http.StopableWSGIServer.create(input_app) | |
18 | server.wait() | |
19 | path_to_html_file = os.path.join('tests', 'test.html') | |
20 | test.globs.update( | |
21 | input_app=input_app, | |
22 | server=server, | |
23 | your_url=server.application_url.rstrip('/') + '/html', | |
24 | path_to_html_file=path_to_html_file, | |
25 | ) | |
26 | setup_test.__test__ = False | |
27 | ||
28 | ||
29 | def teardown_test(test): | |
30 | if 'server' in test.globs: | |
31 | test.globs['server'].shutdown() | |
32 | teardown_test.__test__ = False |
2 | 2 | |
3 | 3 | .. |
4 | 4 | >>> from pyquery import PyQuery as pq |
5 | ||
6 | Using attribute to select specific tag | |
7 | In attribute selectors, the value should be a valid CSS identifier or quoted as string:: | |
8 | ||
9 | >>> d = pq("<option value='1'><option value='2'>") | |
10 | >>> d('option[value="1"]') | |
11 | [<option>] | |
12 | ||
5 | 13 | |
6 | 14 | You can play with the attributes with the jquery API:: |
7 | 15 |
40 | 40 | |
41 | 41 | # General information about the project. |
42 | 42 | project = u'pyquery' |
43 | copyright = u'2012, Olivier Lauzanne' | |
43 | copyright = u'2012-2017, Olivier Lauzanne' | |
44 | 44 | |
45 | 45 | # The version info for the project you're documenting, acts as replacement for |
46 | 46 | # |version| and |release|, also used in various other places throughout the |
47 | 47 | # built documents. |
48 | 48 | # |
49 | 49 | # The short X.Y version. |
50 | version = '0.3' | |
50 | version = '1.3.x' | |
51 | 51 | # The full version, including alpha/beta/rc tags. |
52 | release = '0.3' | |
52 | release = '1.3.x' | |
53 | 53 | |
54 | 54 | # The language for content autogenerated by Sphinx. Refer to documentation |
55 | 55 | # for a list of supported languages. |
253 | 253 | break |
254 | 254 | del pkg_dir, setup, path |
255 | 255 | |
256 | from pyquery.cssselectpatch import JQueryTranslator | |
257 | ||
258 | with open('pseudo_classes.rst', 'w') as fd: | |
259 | fd.write('=========================\n') | |
260 | fd.write('Using pseudo classes\n') | |
261 | fd.write('=========================\n') | |
262 | for k in sorted(dir(JQueryTranslator)): | |
263 | if k.startswith('xpath_'): | |
264 | attr = getattr(JQueryTranslator, k) | |
265 | doc = getattr(attr, '__doc__', '') or '' | |
266 | doc = doc.strip() | |
267 | if doc.startswith('Common implementation'): | |
268 | continue | |
269 | k = k[6:] | |
270 | if '_' not in k or not doc: | |
271 | continue | |
272 | k, t = k.split('_', 1) | |
273 | if '_' in t: | |
274 | continue | |
275 | if t == 'function': | |
276 | k += '()' | |
277 | fd.write('\n\n:%s\n' % k) | |
278 | fd.write('==================\n\n') | |
279 | fd.write(doc.strip('..').replace(' ', ' ')) | |
256 | try: | |
257 | from pyquery.cssselectpatch import JQueryTranslator | |
258 | except ImportError: | |
259 | pass | |
260 | else: | |
261 | with open('pseudo_classes.rst', 'w') as fd: | |
262 | fd.write('=========================\n') | |
263 | fd.write('Using pseudo classes\n') | |
264 | fd.write('=========================\n') | |
265 | for k in sorted(dir(JQueryTranslator)): | |
266 | if k.startswith('xpath_'): | |
267 | attr = getattr(JQueryTranslator, k) | |
268 | doc = getattr(attr, '__doc__', '') or '' | |
269 | doc = doc.strip() | |
270 | if doc.startswith('Common implementation'): | |
271 | continue | |
272 | k = k[6:] | |
273 | if '_' not in k or not doc: | |
274 | continue | |
275 | k, t = k.split('_', 1) | |
276 | if '_' in t: | |
277 | continue | |
278 | if t == 'function': | |
279 | k += '()' | |
280 | fd.write('\n\n:%s\n' % k) | |
281 | fd.write('==================\n\n') | |
282 | fd.write(doc.strip('..').replace(' ', ' ')) |
0 | import os | |
1 | import sys | |
2 | import pytest | |
3 | from webtest import http | |
4 | from webtest.debugapp import debug_app | |
5 | ||
6 | ||
7 | @pytest.fixture | |
8 | def scrap_url(): | |
9 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
10 | from tests.apps import input_app | |
11 | server = http.StopableWSGIServer.create(input_app) | |
12 | server.wait() | |
13 | yield server.application_url.rstrip('/') + '/html' | |
14 | server.shutdown() | |
15 | ||
16 | ||
17 | @pytest.fixture | |
18 | def tips_url(): | |
19 | server = http.StopableWSGIServer.create(debug_app) | |
20 | server.wait() | |
21 | yield server.application_url.rstrip('/') + '/form.html' | |
22 | server.shutdown() |
70 | 70 | >>> print(pq('<div>Yeah !</div>').addClass('myclass') + pq('<b>cool</b>')) |
71 | 71 | <div class="myclass">Yeah !</div><b>cool</b> |
72 | 72 | |
73 | Remove all namespaces:: | |
73 | 74 | |
75 | >>> d = pq('<foo xmlns="http://example.com/foo"></foo>') | |
76 | >>> d | |
77 | [<{http://example.com/foo}foo>] | |
78 | >>> d.remove_namespaces() | |
79 | [<foo>] | |
80 |
51 | 51 | |
52 | 52 | >>> from pyquery import PyQuery |
53 | 53 | >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>') |
54 | >>> d(':contains("title")') | |
54 | >>> d('h1:contains("title")') | |
55 | 55 | [<h1.title>] |
56 | 56 | |
57 | 57 | |
81 | 81 | >>> from pyquery import PyQuery |
82 | 82 | >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>') |
83 | 83 | >>> d(':empty') |
84 | [<span>, <h2>] | |
84 | [<h2>] | |
85 | 85 | |
86 | 86 | |
87 | 87 | |
159 | 159 | |
160 | 160 | |
161 | 161 | |
162 | :has() | |
163 | ================== | |
164 | ||
165 | Matches elements which contain at least one element that matches | |
166 | the specified selector. https://api.jquery.com/has-selector/ | |
167 | ||
168 | >>> from pyquery import PyQuery | |
169 | >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>') | |
170 | >>> d('.foo:has(".baz")') | |
171 | [] | |
172 | >>> d('.foo:has(".foo")') | |
173 | [] | |
174 | >>> d('.foo:has(".bar")') | |
175 | [<div.foo>] | |
176 | >>> d('.foo:has(div)') | |
177 | [<div.foo>] | |
178 | ||
179 | ||
180 | ||
162 | 181 | :header |
163 | 182 | ================== |
164 | 183 | |
268 | 287 | |
269 | 288 | |
270 | 289 | |
290 | :pseudo | |
291 | ================== | |
292 | ||
293 | Translate a pseudo-element. | |
294 | ||
295 | Defaults to not supporting pseudo-elements at all, | |
296 | but can be overridden by sub-classes | |
297 | ||
271 | 298 | :radio |
272 | 299 | ================== |
273 | 300 |
1 | 1 | ========= |
2 | 2 | |
3 | 3 | .. |
4 | >>> from pyquery.ajax import PyQuery as pq | |
4 | >>> from pyquery import PyQuery as pq | |
5 | >>> your_url = getfixture('scrap_url') | |
5 | 6 | |
6 | 7 | PyQuery is able to load an html document from a url:: |
7 | 8 | |
18 | 19 | >>> pq(your_url, {'q': 'foo'}, method='post', verify=True) |
19 | 20 | [<html>] |
20 | 21 | |
22 | ||
23 | Timeout | |
24 | ------- | |
25 | ||
26 | The default timeout is 60 seconds, you can change it by setting the timeout parameter which is forwarded to the underlying urllib or requests library. | |
27 | ||
28 | Session | |
29 | ------- | |
30 | ||
31 | When using the requests library you can instantiate a Session object which keeps state between http calls (for example - to keep cookies). You can set the session parameter to use this session object. | |
32 | ||
21 | 33 | .. _requests: http://docs.python-requests.org/en/latest/ |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | import sys | |
3 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
4 | from webtest import http | |
5 | from tests.apps import input_app | |
6 | ||
7 | ||
8 | def setup_test(test): | |
9 | server = http.StopableWSGIServer.create(input_app) | |
10 | server.wait() | |
11 | test.globs.update( | |
12 | server=server, | |
13 | your_url=server.application_url.rstrip('/') + '/html', | |
14 | ) | |
15 | setup_test.__test__ = False | |
16 | ||
17 | ||
18 | def teardown_test(test): | |
19 | test.globs['server'].shutdown() | |
20 | teardown_test.__test__ = False |
2 | 2 | |
3 | 3 | .. |
4 | 4 | >>> from pyquery import PyQuery as pq |
5 | >>> your_url = getfixture('tips_url') | |
5 | 6 | |
6 | 7 | Making links absolute |
7 | 8 | --------------------- |
8 | 9 | |
9 | You can make links absolute which can be usefull for screen scrapping:: | |
10 | You can make links absolute which can be useful for screen scrapping:: | |
10 | 11 | |
11 | 12 | >>> d = pq(url=your_url, parser='html') |
12 | 13 | >>> d('form').attr('action') |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | from webtest import http | |
3 | from webtest.debugapp import debug_app | |
4 | ||
5 | ||
6 | def setup_test(test): | |
7 | server = http.StopableWSGIServer.create(debug_app) | |
8 | server.wait() | |
9 | path_to_html_file = os.path.join('tests', 'test.html') | |
10 | test.globs.update( | |
11 | server=server, | |
12 | your_url=server.application_url.rstrip('/') + '/form.html', | |
13 | path_to_html_file=path_to_html_file, | |
14 | ) | |
15 | setup_test.__test__ = False | |
16 | ||
17 | ||
18 | def teardown_test(test): | |
19 | test.globs['server'].shutdown() | |
20 | teardown_test.__test__ = False |
33 | 33 | [<p#hello.hello>, <p#test>] |
34 | 34 | |
35 | 35 | |
36 | If you want to select a dotted id you need to escape the dot:: | |
37 | ||
38 | >>> d = pq('<p id="hello.you"><a/></p><p id="test"><a/></p>') | |
39 | >>> d(r'#hello\.you') | |
40 | [<p#hello.you>] | |
41 |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | |
6 | try: | |
7 | import webob | |
8 | import restkit | |
9 | except ImportError: | |
10 | from .pyquery import PyQuery | |
11 | else: | |
12 | from .ajax import PyQuery | |
13 | ||
4 | from .pyquery import PyQuery # NOQA |
0 | # -*- coding: utf-8 -*- | |
1 | from .pyquery import PyQuery as Base | |
2 | from .pyquery import no_default | |
3 | ||
4 | from webob import Request | |
5 | from webob import Response | |
6 | ||
7 | try: | |
8 | from restkit.contrib.wsgi_proxy import HostProxy | |
9 | except ImportError: | |
10 | HostProxy = no_default # NOQA | |
11 | ||
12 | ||
13 | class PyQuery(Base): | |
14 | ||
15 | def __init__(self, *args, **kwargs): | |
16 | if 'response' in kwargs: | |
17 | self.response = kwargs.pop('response') | |
18 | else: | |
19 | self.response = Response() | |
20 | if 'app' in kwargs: | |
21 | self.app = kwargs.pop('app') | |
22 | if len(args) == 0: | |
23 | args = [[]] | |
24 | else: | |
25 | self.app = no_default | |
26 | Base.__init__(self, *args, **kwargs) | |
27 | if self._parent is not no_default: | |
28 | self.app = self._parent.app | |
29 | ||
30 | def _wsgi_get(self, path_info, **kwargs): | |
31 | if path_info.startswith('/'): | |
32 | if 'app' in kwargs: | |
33 | app = kwargs.pop('app') | |
34 | elif self.app is not no_default: | |
35 | app = self.app | |
36 | else: | |
37 | raise ValueError('There is no app available') | |
38 | else: | |
39 | if HostProxy is not no_default: | |
40 | app = HostProxy(path_info) | |
41 | path_info = '/' | |
42 | else: | |
43 | raise ImportError('restkit is not installed') | |
44 | ||
45 | environ = kwargs.pop('environ').copy() | |
46 | environ.update(kwargs) | |
47 | ||
48 | # unsuported (came from Deliverance) | |
49 | for key in ['HTTP_ACCEPT_ENCODING', 'HTTP_IF_MATCH', | |
50 | 'HTTP_IF_UNMODIFIED_SINCE', 'HTTP_RANGE', 'HTTP_IF_RANGE']: | |
51 | if key in environ: | |
52 | del environ[key] | |
53 | ||
54 | req = Request.blank(path_info) | |
55 | req.environ.update(environ) | |
56 | resp = req.get_response(app) | |
57 | status = resp.status.split() | |
58 | ctype = resp.content_type.split(';')[0] | |
59 | if status[0] not in '45' and ctype == 'text/html': | |
60 | body = resp.body | |
61 | else: | |
62 | body = [] | |
63 | result = self.__class__(body, | |
64 | parent=self._parent, | |
65 | app=self.app, # always return self.app | |
66 | response=resp) | |
67 | return result | |
68 | ||
69 | def get(self, path_info, **kwargs): | |
70 | """GET a path from wsgi app or url | |
71 | """ | |
72 | environ = kwargs.setdefault('environ', {}) | |
73 | environ['REQUEST_METHOD'] = 'GET' | |
74 | environ['CONTENT_LENGTH'] = '0' | |
75 | return self._wsgi_get(path_info, **kwargs) | |
76 | ||
77 | def post(self, path_info, **kwargs): | |
78 | """POST a path from wsgi app or url | |
79 | """ | |
80 | environ = kwargs.setdefault('environ', {}) | |
81 | environ['REQUEST_METHOD'] = 'POST' | |
82 | return self._wsgi_get(path_info, **kwargs) |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
126 | 124 | xpath.add_condition("@selected and name(.) = 'option'") |
127 | 125 | return xpath |
128 | 126 | |
127 | def _format_disabled_xpath(self, disabled=True): | |
128 | """Format XPath condition for :disabled or :enabled pseudo-classes | |
129 | according to the WHATWG spec. See: https://html.spec.whatwg.org | |
130 | /multipage/semantics-other.html#concept-element-disabled | |
131 | """ | |
132 | bool_op = '' if disabled else 'not' | |
133 | return '''( | |
134 | ((name(.) = 'button' or name(.) = 'input' or name(.) = 'select' | |
135 | or name(.) = 'textarea' or name(.) = 'fieldset') | |
136 | and %s(@disabled or (ancestor::fieldset[@disabled] | |
137 | and not(ancestor::legend[not(preceding-sibling::legend)]))) | |
138 | ) | |
139 | or | |
140 | ((name(.) = 'option' | |
141 | and %s(@disabled or ancestor::optgroup[@disabled])) | |
142 | ) | |
143 | or | |
144 | ((name(.) = 'optgroup' and %s(@disabled))) | |
145 | )''' % (bool_op, bool_op, bool_op) | |
146 | ||
129 | 147 | def xpath_disabled_pseudo(self, xpath): |
130 | 148 | """Matches all elements that are disabled:: |
131 | 149 | |
136 | 154 | |
137 | 155 | .. |
138 | 156 | """ |
139 | xpath.add_condition("@disabled") | |
157 | xpath.add_condition(self._format_disabled_xpath()) | |
140 | 158 | return xpath |
141 | 159 | |
142 | 160 | def xpath_enabled_pseudo(self, xpath): |
149 | 167 | |
150 | 168 | .. |
151 | 169 | """ |
152 | xpath.add_condition("not(@disabled) and name(.) = 'input'") | |
170 | xpath.add_condition(self._format_disabled_xpath(disabled=False)) | |
153 | 171 | return xpath |
154 | 172 | |
155 | 173 | def xpath_file_pseudo(self, xpath): |
336 | 354 | >>> from pyquery import PyQuery |
337 | 355 | >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>') |
338 | 356 | >>> d(':empty') |
339 | [<span>, <h2>] | |
340 | ||
341 | .. | |
342 | """ | |
343 | xpath.add_condition("count(child::*) = 0") | |
357 | [<h2>] | |
358 | ||
359 | .. | |
360 | """ | |
361 | xpath.add_condition("not(node())") | |
344 | 362 | return xpath |
345 | 363 | |
346 | 364 | def xpath_eq_function(self, xpath, function): |
405 | 423 | |
406 | 424 | >>> from pyquery import PyQuery |
407 | 425 | >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>') |
408 | >>> d(':contains("title")') | |
426 | >>> d('h1:contains("title")') | |
409 | 427 | [<h1.title>] |
410 | 428 | |
411 | 429 | .. |
412 | 430 | """ |
413 | if function.argument_types() != ['STRING']: | |
431 | if function.argument_types() not in (['STRING'], ['IDENT']): | |
414 | 432 | raise ExpressionError( |
415 | "Expected a single string for :contains(), got %r" % ( | |
433 | "Expected a single string or ident for :contains(), got %r" % ( | |
416 | 434 | function.arguments,)) |
417 | 435 | |
418 | 436 | value = self.xpath_literal(function.arguments[0].value) |
419 | xpath.add_post_condition("contains(text(), %s)" % value) | |
420 | return xpath | |
437 | xpath.add_post_condition('contains(., %s)' % value) | |
438 | return xpath | |
439 | ||
440 | def xpath_has_function(self, xpath, function): | |
441 | """Matches elements which contain at least one element that matches | |
442 | the specified selector. https://api.jquery.com/has-selector/ | |
443 | ||
444 | >>> from pyquery import PyQuery | |
445 | >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>') | |
446 | >>> d('.foo:has(".baz")') | |
447 | [] | |
448 | >>> d('.foo:has(".foo")') | |
449 | [] | |
450 | >>> d('.foo:has(".bar")') | |
451 | [<div.foo>] | |
452 | >>> d('.foo:has(div)') | |
453 | [<div.foo>] | |
454 | ||
455 | .. | |
456 | """ | |
457 | if function.argument_types() not in (['STRING'], ['IDENT']): | |
458 | raise ExpressionError( | |
459 | "Expected a single string or ident for :has(), got %r" % ( | |
460 | function.arguments,)) | |
461 | value = self.css_to_xpath( | |
462 | function.arguments[0].value, prefix='descendant::', | |
463 | ) | |
464 | xpath.add_post_condition(value) | |
465 | return xpath |
0 | 0 | # -*- coding: utf-8 -*- |
1 | import sys | |
2 | ||
3 | PY3k = sys.version_info >= (3,) | |
4 | ||
5 | if PY3k: | |
6 | from urllib.request import urlopen | |
7 | from urllib.parse import urlencode | |
8 | basestring = (str, bytes) | |
9 | else: | |
10 | from urllib2 import urlopen # NOQA | |
11 | from urllib import urlencode # NOQA | |
1 | from urllib.request import urlopen | |
2 | from urllib.parse import urlencode | |
3 | from urllib.error import HTTPError | |
12 | 4 | |
13 | 5 | try: |
14 | 6 | import requests |
16 | 8 | except ImportError: |
17 | 9 | HAS_REQUEST = False |
18 | 10 | |
11 | DEFAULT_TIMEOUT = 60 | |
12 | ||
13 | basestring = (str, bytes) | |
19 | 14 | |
20 | 15 | allowed_args = ( |
21 | 'auth', 'data', 'headers', 'verify', 'cert', 'config', 'hooks', 'proxies') | |
16 | 'auth', 'data', 'headers', 'verify', | |
17 | 'cert', 'config', 'hooks', 'proxies', 'cookies' | |
18 | ) | |
22 | 19 | |
23 | 20 | |
24 | 21 | def _query(url, method, kwargs): |
37 | 34 | url += data |
38 | 35 | data = None |
39 | 36 | |
40 | if data and PY3k: | |
37 | if data: | |
41 | 38 | data = data.encode('utf-8') |
42 | 39 | return url, data |
43 | 40 | |
44 | 41 | |
45 | 42 | def _requests(url, kwargs): |
43 | ||
46 | 44 | encoding = kwargs.get('encoding') |
47 | 45 | method = kwargs.get('method', 'get').lower() |
48 | meth = getattr(requests, str(method)) | |
46 | session = kwargs.get('session') | |
47 | if session: | |
48 | meth = getattr(session, str(method)) | |
49 | else: | |
50 | meth = getattr(requests, str(method)) | |
49 | 51 | if method == 'get': |
50 | 52 | url, data = _query(url, method, kwargs) |
51 | 53 | kw = {} |
52 | 54 | for k in allowed_args: |
53 | 55 | if k in kwargs: |
54 | 56 | kw[k] = kwargs[k] |
55 | resp = meth(url=url, **kw) | |
57 | resp = meth(url=url, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT), **kw) | |
58 | if not (200 <= resp.status_code < 300): | |
59 | raise HTTPError(resp.url, resp.status_code, | |
60 | resp.reason, resp.headers, None) | |
56 | 61 | if encoding: |
57 | 62 | resp.encoding = encoding |
58 | 63 | html = resp.text |
62 | 67 | def _urllib(url, kwargs): |
63 | 68 | method = kwargs.get('method') |
64 | 69 | url, data = _query(url, method, kwargs) |
65 | return urlopen(url, data) | |
70 | return urlopen(url, data, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)) | |
66 | 71 | |
67 | 72 | |
68 | 73 | def url_opener(url, kwargs): |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | from .cssselectpatch import JQueryTranslator |
4 | from collections import OrderedDict | |
5 | from urllib.parse import urlencode | |
6 | from urllib.parse import urljoin | |
6 | 7 | from .openers import url_opener |
8 | from .text import extract_text | |
7 | 9 | from copy import deepcopy |
8 | 10 | from lxml import etree |
9 | 11 | import lxml.html |
10 | 12 | import inspect |
13 | import itertools | |
11 | 14 | import types |
12 | import sys | |
13 | ||
14 | ||
15 | PY3k = sys.version_info >= (3,) | |
16 | ||
17 | if PY3k: | |
18 | from urllib.parse import urlencode | |
19 | from urllib.parse import urljoin | |
20 | basestring = (str, bytes) | |
21 | unicode = str | |
22 | else: | |
23 | from urllib import urlencode # NOQA | |
24 | from urlparse import urljoin # NOQA | |
25 | ||
26 | ||
27 | def func_globals(f): | |
28 | return f.__globals__ if PY3k else f.func_globals | |
29 | ||
30 | ||
31 | def func_code(f): | |
32 | return f.__code__ if PY3k else f.func_code | |
15 | ||
16 | basestring = (str, bytes) | |
17 | ||
18 | ||
19 | def getargspec(func): | |
20 | args = inspect.signature(func).parameters.values() | |
21 | return [p.name for p in args | |
22 | if p.kind == p.POSITIONAL_OR_KEYWORD] | |
33 | 23 | |
34 | 24 | |
35 | 25 | def with_camel_case_alias(func): |
36 | 26 | """decorator for methods who required a camelcase alias""" |
37 | 27 | _camel_case_aliases.add(func.__name__) |
38 | 28 | return func |
29 | ||
30 | ||
39 | 31 | _camel_case_aliases = set() |
40 | 32 | |
41 | 33 | |
45 | 37 | parts = list(alias.split('_')) |
46 | 38 | name = parts[0] + ''.join([p.title() for p in parts[1:]]) |
47 | 39 | func = getattr(PyQuery, alias) |
48 | f = types.FunctionType(func_code(func), func_globals(func), | |
49 | name, inspect.getargspec(func).defaults) | |
40 | f = types.FunctionType(func.__code__, func.__globals__, | |
41 | name, func.__defaults__) | |
50 | 42 | f.__doc__ = ( |
51 | 43 | 'Alias for :func:`~pyquery.pyquery.PyQuery.%s`') % func.__name__ |
52 | 44 | setattr(PyQuery, name, f.__get__(None, PyQuery)) |
98 | 90 | |
99 | 91 | |
100 | 92 | def callback(func, *args): |
101 | return func(*args[:func_code(func).co_argcount]) | |
93 | return func(*args[:func.__code__.co_argcount]) | |
102 | 94 | |
103 | 95 | |
104 | 96 | class NoDefault(object): |
105 | 97 | def __repr__(self): |
106 | 98 | """clean representation in Sphinx""" |
107 | 99 | return '<NoDefault>' |
100 | ||
108 | 101 | |
109 | 102 | no_default = NoDefault() |
110 | 103 | del NoDefault |
156 | 149 | self.parser = kwargs.pop('parser', None) |
157 | 150 | |
158 | 151 | if (len(args) >= 1 and |
159 | (not PY3k and isinstance(args[0], basestring) or | |
160 | (PY3k and isinstance(args[0], str))) and | |
152 | isinstance(args[0], str) and | |
161 | 153 | args[0].split('://', 1)[0] in ('http', 'https')): |
162 | 154 | kwargs['url'] = args[0] |
163 | 155 | if len(args) >= 2: |
178 | 170 | else: |
179 | 171 | self._translator = self._translator_class(xhtml=False) |
180 | 172 | |
181 | namespaces = kwargs.pop('namespaces', {}) | |
173 | self.namespaces = kwargs.pop('namespaces', None) | |
182 | 174 | |
183 | 175 | if kwargs: |
184 | 176 | # specific case to get the dom |
202 | 194 | if hasattr(html, 'close'): |
203 | 195 | try: |
204 | 196 | html.close() |
205 | except: | |
197 | except Exception: | |
206 | 198 | pass |
207 | 199 | |
208 | 200 | else: |
232 | 224 | elements = context |
233 | 225 | elif isinstance(context, etree._Element): |
234 | 226 | elements = [context] |
227 | else: | |
228 | raise TypeError(context) | |
235 | 229 | |
236 | 230 | # select nodes |
237 | 231 | if elements and selector is not no_default: |
238 | 232 | xpath = self._css_to_xpath(selector) |
239 | 233 | results = [] |
240 | 234 | for tag in elements: |
241 | results.extend(tag.xpath(xpath, namespaces=namespaces)) | |
235 | results.extend( | |
236 | tag.xpath(xpath, namespaces=self.namespaces)) | |
242 | 237 | elements = results |
243 | 238 | |
244 | 239 | list.__init__(self, elements) |
246 | 241 | def _css_to_xpath(self, selector, prefix='descendant-or-self::'): |
247 | 242 | selector = selector.replace('[@', '[') |
248 | 243 | return self._translator.css_to_xpath(selector, prefix) |
244 | ||
245 | def _copy(self, *args, **kwargs): | |
246 | kwargs.setdefault('namespaces', self.namespaces) | |
247 | return self.__class__(*args, **kwargs) | |
249 | 248 | |
250 | 249 | def __call__(self, *args, **kwargs): |
251 | 250 | """return a new PyQuery instance |
254 | 253 | if length == 0: |
255 | 254 | raise ValueError('You must provide at least a selector') |
256 | 255 | if args[0] == '': |
257 | return self.__class__([]) | |
256 | return self._copy([]) | |
258 | 257 | if (len(args) == 1 and |
259 | (not PY3k and isinstance(args[0], basestring) or | |
260 | (PY3k and isinstance(args[0], str))) and | |
258 | isinstance(args[0], str) and | |
261 | 259 | not args[0].startswith('<')): |
262 | 260 | args += (self,) |
263 | result = self.__class__(*args, parent=self, **kwargs) | |
261 | result = self._copy(*args, parent=self, **kwargs) | |
264 | 262 | return result |
265 | 263 | |
266 | 264 | # keep original list api prefixed with _ |
270 | 268 | # improve pythonic api |
271 | 269 | def __add__(self, other): |
272 | 270 | assert isinstance(other, self.__class__) |
273 | return self.__class__(self[:] + other[:]) | |
271 | return self._copy(self[:] + other[:]) | |
274 | 272 | |
275 | 273 | def extend(self, other): |
276 | 274 | """Extend with anoter PyQuery object""" |
277 | 275 | assert isinstance(other, self.__class__) |
278 | 276 | self._extend(other[:]) |
277 | return self | |
279 | 278 | |
280 | 279 | def items(self, selector=None): |
281 | 280 | """Iter over elements. Return PyQuery objects: |
293 | 292 | else: |
294 | 293 | elems = self |
295 | 294 | for elem in elems: |
296 | yield self.__class__(elem, **dict(parent=self)) | |
295 | yield self._copy(elem, parent=self) | |
297 | 296 | |
298 | 297 | def xhtml_to_html(self): |
299 | 298 | """Remove xhtml namespace: |
341 | 340 | <script><![[CDATA[ ]></script> |
342 | 341 | |
343 | 342 | """ |
344 | if PY3k: | |
345 | return ''.join([etree.tostring(e, encoding=str) for e in self]) | |
346 | else: | |
347 | return ''.join([etree.tostring(e) for e in self]) | |
343 | return ''.join([etree.tostring(e, encoding=str) for e in self]) | |
348 | 344 | |
349 | 345 | def __unicode__(self): |
350 | 346 | """xml representation of current nodes""" |
351 | return unicode('').join([etree.tostring(e, encoding=unicode) | |
352 | for e in self]) | |
347 | return u''.join([etree.tostring(e, encoding=str) | |
348 | for e in self]) | |
353 | 349 | |
354 | 350 | def __html__(self): |
355 | 351 | """html representation of current nodes:: |
360 | 356 | <script><![[CDATA[ ]></script> |
361 | 357 | |
362 | 358 | """ |
363 | return unicode('').join([lxml.html.tostring(e, encoding=unicode) | |
364 | for e in self]) | |
359 | return u''.join([lxml.html.tostring(e, encoding=str) | |
360 | for e in self]) | |
365 | 361 | |
366 | 362 | def __repr__(self): |
367 | 363 | r = [] |
374 | 370 | r.append('<%s%s%s>' % (el.tag, id, c)) |
375 | 371 | return '[' + (', '.join(r)) + ']' |
376 | 372 | except AttributeError: |
377 | if PY3k: | |
378 | return list.__repr__(self) | |
379 | else: | |
380 | for el in self: | |
381 | if isinstance(el, unicode): | |
382 | r.append(el.encode('utf-8')) | |
383 | else: | |
384 | r.append(el) | |
385 | return repr(r) | |
373 | return list.__repr__(self) | |
386 | 374 | |
387 | 375 | @property |
388 | 376 | def root(self): |
389 | 377 | """return the xml root element |
390 | 378 | """ |
391 | 379 | if self._parent is not no_default: |
392 | return self._parent.getroottree() | |
380 | return self._parent[0].getroottree() | |
393 | 381 | return self[0].getroottree() |
394 | 382 | |
395 | 383 | @property |
414 | 402 | xpath = self._css_to_xpath(selector, 'self::') |
415 | 403 | results = [] |
416 | 404 | for tag in elements: |
417 | results.extend(tag.xpath(xpath)) | |
405 | results.extend(tag.xpath(xpath, namespaces=self.namespaces)) | |
418 | 406 | if reverse: |
419 | 407 | results.reverse() |
420 | 408 | if unique: |
421 | 409 | result_list = results |
422 | 410 | results = [] |
423 | 411 | for item in result_list: |
424 | if not item in results: | |
412 | if item not in results: | |
425 | 413 | results.append(item) |
426 | return self.__class__(results, **dict(parent=self)) | |
414 | return self._copy(results, parent=self) | |
427 | 415 | |
428 | 416 | def parent(self, selector=None): |
429 | 417 | return self._filter_only( |
474 | 462 | """ |
475 | 463 | return self._filter_only(selector, self._next_all()) |
476 | 464 | |
465 | @with_camel_case_alias | |
466 | def next_until(self, selector, filter_=None): | |
467 | """ | |
468 | >>> h = ''' | |
469 | ... <h2>Greeting 1</h2> | |
470 | ... <p>Hello!</p><p>World!</p> | |
471 | ... <h2>Greeting 2</h2><p>Bye!</p> | |
472 | ... ''' | |
473 | >>> d = PyQuery(h) | |
474 | >>> d('h2:first').nextUntil('h2') | |
475 | [<p>, <p>] | |
476 | """ | |
477 | return self._filter_only( | |
478 | filter_, [ | |
479 | e | |
480 | for q in itertools.takewhile( | |
481 | lambda q: not q.is_(selector), self.next_all().items()) | |
482 | for e in q | |
483 | ] | |
484 | ) | |
485 | ||
477 | 486 | def _prev_all(self): |
478 | 487 | return [e for e in self._traverse('getprevious')] |
479 | 488 | |
547 | 556 | result = [] |
548 | 557 | for current in self: |
549 | 558 | while (current is not None and |
550 | not self.__class__(current).is_(selector)): | |
559 | not self._copy(current).is_(selector)): | |
551 | 560 | current = current.getparent() |
552 | 561 | if current is not None: |
553 | 562 | result.append(current) |
554 | return self.__class__(result, **dict(parent=self)) | |
563 | return self._copy(result, parent=self) | |
555 | 564 | |
556 | 565 | def contents(self): |
557 | 566 | """ |
563 | 572 | """ |
564 | 573 | results = [] |
565 | 574 | for elem in self: |
566 | results.extend(elem.xpath('child::text()|child::*')) | |
567 | return self.__class__(results, **dict(parent=self)) | |
575 | results.extend(elem.xpath('child::text()|child::*', | |
576 | namespaces=self.namespaces)) | |
577 | return self._copy(results, parent=self) | |
568 | 578 | |
569 | 579 | def filter(self, selector): |
570 | 580 | """Filter elements in self using selector (string or function): |
585 | 595 | return self._filter_only(selector, self) |
586 | 596 | else: |
587 | 597 | elements = [] |
588 | args = inspect.getargspec(callback).args | |
598 | args = getargspec(callback) | |
589 | 599 | try: |
590 | 600 | for i, this in enumerate(self): |
591 | 601 | if len(args) == 1: |
592 | func_globals(selector)['this'] = this | |
602 | selector.__globals__['this'] = this | |
593 | 603 | if callback(selector, i, this): |
594 | 604 | elements.append(this) |
595 | 605 | finally: |
596 | f_globals = func_globals(selector) | |
606 | f_globals = selector.__globals__ | |
597 | 607 | if 'this' in f_globals: |
598 | 608 | del f_globals['this'] |
599 | return self.__class__(elements, **dict(parent=self)) | |
609 | return self._copy(elements, parent=self) | |
600 | 610 | |
601 | 611 | def not_(self, selector): |
602 | 612 | """Return elements that don't match the given selector: |
605 | 615 | >>> d('p').not_('.hello') |
606 | 616 | [<p>] |
607 | 617 | """ |
608 | exclude = set(self.__class__(selector, self)) | |
609 | return self.__class__([e for e in self if e not in exclude], | |
610 | **dict(parent=self)) | |
618 | exclude = set(self._copy(selector, self)) | |
619 | return self._copy([e for e in self if e not in exclude], | |
620 | parent=self) | |
611 | 621 | |
612 | 622 | def is_(self, selector): |
613 | 623 | """Returns True if selector matches at least one current element, else |
638 | 648 | [<em>] |
639 | 649 | """ |
640 | 650 | xpath = self._css_to_xpath(selector) |
641 | results = [child.xpath(xpath) for tag in self | |
651 | results = [child.xpath(xpath, namespaces=self.namespaces) | |
652 | for tag in self | |
642 | 653 | for child in tag.getchildren()] |
643 | 654 | # Flatten the results |
644 | 655 | elements = [] |
645 | 656 | for r in results: |
646 | 657 | elements.extend(r) |
647 | return self.__class__(elements, **dict(parent=self)) | |
658 | return self._copy(elements, parent=self) | |
648 | 659 | |
649 | 660 | def eq(self, index): |
650 | 661 | """Return PyQuery of only the element with the provided index:: |
659 | 670 | |
660 | 671 | .. |
661 | 672 | """ |
662 | # Use slicing to silently handle out of bounds indexes | |
663 | items = self[index:index + 1] | |
664 | return self.__class__(items, **dict(parent=self)) | |
673 | # Slicing will return empty list when index=-1 | |
674 | # we should handle out of bound by ourselves | |
675 | try: | |
676 | items = self[index] | |
677 | except IndexError: | |
678 | items = [] | |
679 | return self._copy(items, parent=self) | |
665 | 680 | |
666 | 681 | def each(self, func): |
667 | 682 | """apply func on each nodes |
668 | 683 | """ |
669 | 684 | try: |
670 | 685 | for i, element in enumerate(self): |
671 | func_globals(func)['this'] = element | |
686 | func.__globals__['this'] = element | |
672 | 687 | if callback(func, i, element) is False: |
673 | 688 | break |
674 | 689 | finally: |
675 | f_globals = func_globals(func) | |
690 | f_globals = func.__globals__ | |
676 | 691 | if 'this' in f_globals: |
677 | 692 | del f_globals['this'] |
678 | 693 | return self |
697 | 712 | items = [] |
698 | 713 | try: |
699 | 714 | for i, element in enumerate(self): |
700 | func_globals(func)['this'] = element | |
715 | func.__globals__['this'] = element | |
701 | 716 | result = callback(func, i, element) |
702 | 717 | if result is not None: |
703 | 718 | if not isinstance(result, list): |
705 | 720 | else: |
706 | 721 | items.extend(result) |
707 | 722 | finally: |
708 | f_globals = func_globals(func) | |
723 | f_globals = func.__globals__ | |
709 | 724 | if 'this' in f_globals: |
710 | 725 | del f_globals['this'] |
711 | return self.__class__(items, **dict(parent=self)) | |
726 | return self._copy(items, parent=self) | |
712 | 727 | |
713 | 728 | @property |
714 | 729 | def length(self): |
759 | 774 | tag.set(key, value) |
760 | 775 | elif value is no_default: |
761 | 776 | return self[0].get(attr) |
762 | elif value is None or value == '': | |
777 | elif value is None: | |
763 | 778 | return self.remove_attr(attr) |
764 | 779 | else: |
765 | 780 | for tag in self: |
923 | 938 | # CORE UI EFFECTS # |
924 | 939 | ################### |
925 | 940 | def hide(self): |
926 | """remove display:none to elements style | |
941 | """Remove display:none to elements style: | |
927 | 942 | |
928 | 943 | >>> print(PyQuery('<div style="display:none;"/>').hide()) |
929 | 944 | <div style="display: none"/> |
932 | 947 | return self.css('display', 'none') |
933 | 948 | |
934 | 949 | def show(self): |
935 | """add display:block to elements style | |
950 | """Add display:block to elements style: | |
936 | 951 | |
937 | 952 | >>> print(PyQuery('<div />').show()) |
938 | 953 | <div style="display: block"/> |
955 | 970 | >>> d.val() |
956 | 971 | 'Youhou' |
957 | 972 | |
958 | """ | |
959 | return self.attr('value', value) or None | |
973 | Set the selected values for a `select` element with the `multiple` | |
974 | attribute:: | |
975 | ||
976 | >>> d = PyQuery(''' | |
977 | ... <select multiple> | |
978 | ... <option value="you"><option value="hou"> | |
979 | ... </select> | |
980 | ... ''') | |
981 | >>> d.val(['you', 'hou']) | |
982 | [<select>] | |
983 | ||
984 | Get the selected values for a `select` element with the `multiple` | |
985 | attribute:: | |
986 | ||
987 | >>> d.val() | |
988 | ['you', 'hou'] | |
989 | ||
990 | """ | |
991 | def _get_value(tag): | |
992 | # <textarea> | |
993 | if tag.tag == 'textarea': | |
994 | return self._copy(tag).html() | |
995 | # <select> | |
996 | elif tag.tag == 'select': | |
997 | if 'multiple' in tag.attrib: | |
998 | # Only extract value if selected | |
999 | selected = self._copy(tag)('option[selected]') | |
1000 | # Rebuild list to avoid serialization error | |
1001 | return list(selected.map( | |
1002 | lambda _, o: self._copy(o).attr('value') | |
1003 | )) | |
1004 | selected_option = self._copy(tag)('option[selected]:last') | |
1005 | if selected_option: | |
1006 | return selected_option.attr('value') | |
1007 | else: | |
1008 | return self._copy(tag)('option').attr('value') | |
1009 | # <input type="checkbox"> or <input type="radio"> | |
1010 | elif self.is_(':checkbox,:radio'): | |
1011 | val = self._copy(tag).attr('value') | |
1012 | if val is None: | |
1013 | return 'on' | |
1014 | else: | |
1015 | return val | |
1016 | # <input> | |
1017 | elif tag.tag == 'input': | |
1018 | val = self._copy(tag).attr('value') | |
1019 | return val.replace('\n', '') if val else '' | |
1020 | # everything else. | |
1021 | return self._copy(tag).attr('value') or '' | |
1022 | ||
1023 | def _set_value(pq, value): | |
1024 | for tag in pq: | |
1025 | # <select> | |
1026 | if tag.tag == 'select': | |
1027 | if not isinstance(value, list): | |
1028 | value = [value] | |
1029 | ||
1030 | def _make_option_selected(_, elem): | |
1031 | pq = self._copy(elem) | |
1032 | if pq.attr('value') in value: | |
1033 | pq.attr('selected', 'selected') | |
1034 | if 'multiple' not in tag.attrib: | |
1035 | del value[:] # Ensure it toggles first match | |
1036 | else: | |
1037 | pq.removeAttr('selected') | |
1038 | ||
1039 | self._copy(tag)('option').each(_make_option_selected) | |
1040 | continue | |
1041 | # Stringify array | |
1042 | if isinstance(value, list): | |
1043 | value = ','.join(value) | |
1044 | # <textarea> | |
1045 | if tag.tag == 'textarea': | |
1046 | self._copy(tag).text(value) | |
1047 | continue | |
1048 | # <input> and everything else. | |
1049 | self._copy(tag).attr('value', value) | |
1050 | ||
1051 | if value is no_default: | |
1052 | if len(self): | |
1053 | return _get_value(self[0]) | |
1054 | else: | |
1055 | _set_value(self, value) | |
1056 | return self | |
960 | 1057 | |
961 | 1058 | def html(self, value=no_default, **kwargs): |
962 | 1059 | """Get or set the html representation of sub nodes. |
988 | 1085 | tag = self[0] |
989 | 1086 | children = tag.getchildren() |
990 | 1087 | if not children: |
991 | return tag.text | |
1088 | return tag.text or '' | |
992 | 1089 | html = tag.text or '' |
993 | 1090 | if 'encoding' not in kwargs: |
994 | kwargs['encoding'] = unicode | |
995 | html += unicode('').join([etree.tostring(e, **kwargs) | |
996 | for e in children]) | |
1091 | kwargs['encoding'] = str | |
1092 | html += u''.join([etree.tostring(e, **kwargs) | |
1093 | for e in children]) | |
997 | 1094 | return html |
998 | 1095 | else: |
999 | 1096 | if isinstance(value, self.__class__): |
1000 | new_html = unicode(value) | |
1097 | new_html = str(value) | |
1001 | 1098 | elif isinstance(value, basestring): |
1002 | 1099 | new_html = value |
1003 | 1100 | elif not value: |
1009 | 1106 | for child in tag.getchildren(): |
1010 | 1107 | tag.remove(child) |
1011 | 1108 | root = fromstring( |
1012 | unicode('<root>') + new_html + unicode('</root>'), | |
1109 | u'<root>' + new_html + u'</root>', | |
1013 | 1110 | self.parser)[0] |
1014 | 1111 | children = root.getchildren() |
1015 | 1112 | if children: |
1016 | 1113 | tag.extend(children) |
1017 | 1114 | tag.text = root.text |
1018 | tag.tail = root.tail | |
1019 | return self | |
1020 | ||
1021 | @with_camel_case_alias | |
1022 | def outer_html(self): | |
1115 | return self | |
1116 | ||
1117 | @with_camel_case_alias | |
1118 | def outer_html(self, method="html"): | |
1023 | 1119 | """Get the html representation of the first selected element:: |
1024 | 1120 | |
1025 | 1121 | >>> d = PyQuery('<div><span class="red">toto</span> rocks</div>') |
1043 | 1139 | if e0.tail: |
1044 | 1140 | e0 = deepcopy(e0) |
1045 | 1141 | e0.tail = '' |
1046 | return lxml.html.tostring(e0, encoding=unicode) | |
1047 | ||
1048 | def text(self, value=no_default): | |
1142 | return etree.tostring(e0, encoding=str, method=method) | |
1143 | ||
1144 | def text(self, value=no_default, **kwargs): | |
1049 | 1145 | """Get or set the text representation of sub nodes. |
1050 | 1146 | |
1051 | 1147 | Get the text value:: |
1052 | 1148 | |
1053 | 1149 | >>> doc = PyQuery('<div><span>toto</span><span>tata</span></div>') |
1054 | 1150 | >>> print(doc.text()) |
1151 | tototata | |
1152 | >>> doc = PyQuery('''<div><span>toto</span> | |
1153 | ... <span>tata</span></div>''') | |
1154 | >>> print(doc.text()) | |
1055 | 1155 | toto tata |
1156 | ||
1157 | Get the text value, without squashing newlines:: | |
1158 | ||
1159 | >>> doc = PyQuery('''<div><span>toto</span> | |
1160 | ... <span>tata</span></div>''') | |
1161 | >>> print(doc.text(squash_space=False)) | |
1162 | toto | |
1163 | tata | |
1056 | 1164 | |
1057 | 1165 | Set the text value:: |
1058 | 1166 | |
1066 | 1174 | if value is no_default: |
1067 | 1175 | if not self: |
1068 | 1176 | return '' |
1069 | ||
1070 | text = [] | |
1071 | ||
1072 | def add_text(tag, no_tail=False): | |
1073 | if tag.text and not isinstance(tag, lxml.etree._Comment): | |
1074 | text.append(tag.text) | |
1075 | for child in tag.getchildren(): | |
1076 | add_text(child) | |
1077 | if not no_tail and tag.tail: | |
1078 | text.append(tag.tail) | |
1079 | ||
1080 | for tag in self: | |
1081 | add_text(tag, no_tail=True) | |
1082 | return ' '.join([t.strip() for t in text if t.strip()]) | |
1177 | return ' '.join( | |
1178 | self._copy(tag).html() if tag.tag == 'textarea' else | |
1179 | extract_text(tag, **kwargs) for tag in self | |
1180 | ) | |
1083 | 1181 | |
1084 | 1182 | for tag in self: |
1085 | 1183 | for child in tag.getchildren(): |
1093 | 1191 | |
1094 | 1192 | def _get_root(self, value): |
1095 | 1193 | if isinstance(value, basestring): |
1096 | root = fromstring(unicode('<root>') + value + unicode('</root>'), | |
1194 | root = fromstring(u'<root>' + value + u'</root>', | |
1097 | 1195 | self.parser)[0] |
1098 | 1196 | elif isinstance(value, etree._Element): |
1099 | root = self.__class__(value) | |
1197 | root = self._copy(value) | |
1100 | 1198 | elif isinstance(value, PyQuery): |
1101 | 1199 | root = value |
1102 | 1200 | else: |
1125 | 1223 | if i > 0: |
1126 | 1224 | root = deepcopy(list(root)) |
1127 | 1225 | tag.extend(root) |
1128 | root = tag[-len(root):] | |
1129 | 1226 | return self |
1130 | 1227 | |
1131 | 1228 | @with_camel_case_alias |
1299 | 1396 | |
1300 | 1397 | @with_camel_case_alias |
1301 | 1398 | def replace_with(self, value): |
1302 | """replace nodes by value:: | |
1399 | """replace nodes by value: | |
1303 | 1400 | |
1304 | 1401 | >>> doc = PyQuery("<html><div /></html>") |
1305 | 1402 | >>> node = PyQuery("<span />") |
1314 | 1411 | value = str(value) |
1315 | 1412 | if hasattr(value, '__call__'): |
1316 | 1413 | for i, element in enumerate(self): |
1317 | self.__class__(element).before( | |
1414 | self._copy(element).before( | |
1318 | 1415 | value(i, element) + (element.tail or '')) |
1319 | 1416 | parent = element.getparent() |
1320 | 1417 | parent.remove(element) |
1321 | 1418 | else: |
1322 | 1419 | for tag in self: |
1323 | self.__class__(tag).before(value + (tag.tail or '')) | |
1420 | self._copy(tag).before(value + (tag.tail or '')) | |
1324 | 1421 | parent = tag.getparent() |
1325 | 1422 | parent.remove(tag) |
1326 | 1423 | return self |
1351 | 1448 | def remove(self, expr=no_default): |
1352 | 1449 | """Remove nodes: |
1353 | 1450 | |
1354 | >>> h = '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>' | |
1355 | >>> d = PyQuery(h) | |
1356 | >>> d('strong').remove() | |
1357 | [<strong>] | |
1358 | >>> print(d) | |
1359 | <div>Maybe <em>she</em> does know</div> | |
1451 | >>> h = ( | |
1452 | ... '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>' | |
1453 | ... ) | |
1454 | >>> d = PyQuery(h) | |
1455 | >>> d('strong').remove() | |
1456 | [<strong>] | |
1457 | >>> print(d) | |
1458 | <div>Maybe <em>she</em> does know</div> | |
1360 | 1459 | """ |
1361 | 1460 | if expr is no_default: |
1362 | 1461 | for tag in self: |
1374 | 1473 | prev.tail += ' ' + tag.tail |
1375 | 1474 | parent.remove(tag) |
1376 | 1475 | else: |
1377 | results = self.__class__(expr, self) | |
1476 | results = self._copy(expr, self) | |
1378 | 1477 | results.remove() |
1379 | 1478 | return self |
1380 | 1479 | |
1393 | 1492 | """ |
1394 | 1493 | def __setattr__(self, name, func): |
1395 | 1494 | def fn(self, *args, **kwargs): |
1396 | func_globals(func)['this'] = self | |
1495 | func.__globals__['this'] = self | |
1397 | 1496 | return func(*args, **kwargs) |
1398 | 1497 | fn.__name__ = name |
1399 | 1498 | setattr(PyQuery, name, fn) |
1400 | 1499 | fn = Fn() |
1401 | 1500 | |
1501 | ######## | |
1502 | # AJAX # | |
1503 | ######## | |
1504 | ||
1505 | @with_camel_case_alias | |
1506 | def serialize_array(self): | |
1507 | """Serialize form elements as an array of dictionaries, whose structure | |
1508 | mirrors that produced by the jQuery API. Notably, it does not handle | |
1509 | the deprecated `keygen` form element. | |
1510 | ||
1511 | >>> d = PyQuery('<form><input name="order" value="spam"></form>') | |
1512 | >>> d.serialize_array() == [{'name': 'order', 'value': 'spam'}] | |
1513 | True | |
1514 | >>> d.serializeArray() == [{'name': 'order', 'value': 'spam'}] | |
1515 | True | |
1516 | """ | |
1517 | return list(map( | |
1518 | lambda p: {'name': p[0], 'value': p[1]}, | |
1519 | self.serialize_pairs() | |
1520 | )) | |
1521 | ||
1522 | def serialize(self): | |
1523 | """Serialize form elements as a URL-encoded string. | |
1524 | ||
1525 | >>> h = ( | |
1526 | ... '<form><input name="order" value="spam">' | |
1527 | ... '<input name="order2" value="baked beans"></form>' | |
1528 | ... ) | |
1529 | >>> d = PyQuery(h) | |
1530 | >>> d.serialize() | |
1531 | 'order=spam&order2=baked%20beans' | |
1532 | """ | |
1533 | return urlencode(self.serialize_pairs()).replace('+', '%20') | |
1534 | ||
1402 | 1535 | ##################################################### |
1403 | 1536 | # Additional methods that are not in the jQuery API # |
1404 | 1537 | ##################################################### |
1538 | ||
1539 | @with_camel_case_alias | |
1540 | def serialize_pairs(self): | |
1541 | """Serialize form elements as an array of 2-tuples conventional for | |
1542 | typical URL-parsing operations in Python. | |
1543 | ||
1544 | >>> d = PyQuery('<form><input name="order" value="spam"></form>') | |
1545 | >>> d.serialize_pairs() | |
1546 | [('order', 'spam')] | |
1547 | >>> d.serializePairs() | |
1548 | [('order', 'spam')] | |
1549 | """ | |
1550 | # https://github.com/jquery/jquery/blob | |
1551 | # /2d4f53416e5f74fa98e0c1d66b6f3c285a12f0ce/src/serialize.js#L14 | |
1552 | _submitter_types = ['submit', 'button', 'image', 'reset', 'file'] | |
1553 | ||
1554 | controls = self._copy([]) | |
1555 | # Expand list of form controls | |
1556 | for el in self.items(): | |
1557 | if el[0].tag == 'form': | |
1558 | form_id = el.attr('id') | |
1559 | if form_id: | |
1560 | # Include inputs outside of their form owner | |
1561 | root = self._copy(el.root.getroot()) | |
1562 | controls.extend(root( | |
1563 | '#%s :not([form]):input, [form="%s"]:input' | |
1564 | % (form_id, form_id))) | |
1565 | else: | |
1566 | controls.extend(el(':not([form]):input')) | |
1567 | elif el[0].tag == 'fieldset': | |
1568 | controls.extend(el(':input')) | |
1569 | else: | |
1570 | controls.extend(el) | |
1571 | # Filter controls | |
1572 | selector = '[name]:enabled:not(button)' # Not serializing image button | |
1573 | selector += ''.join(map( | |
1574 | lambda s: ':not([type="%s"])' % s, | |
1575 | _submitter_types)) | |
1576 | controls = controls.filter(selector) | |
1577 | ||
1578 | def _filter_out_unchecked(_, el): | |
1579 | el = controls._copy(el) | |
1580 | return not el.is_(':checkbox:not(:checked)') and \ | |
1581 | not el.is_(':radio:not(:checked)') | |
1582 | controls = controls.filter(_filter_out_unchecked) | |
1583 | ||
1584 | # jQuery serializes inputs with the datalist element as an ancestor | |
1585 | # contrary to WHATWG spec as of August 2018 | |
1586 | # | |
1587 | # xpath = 'self::*[not(ancestor::datalist)]' | |
1588 | # results = [] | |
1589 | # for tag in controls: | |
1590 | # results.extend(tag.xpath(xpath, namespaces=controls.namespaces)) | |
1591 | # controls = controls._copy(results) | |
1592 | ||
1593 | # Serialize values | |
1594 | ret = [] | |
1595 | for field in controls: | |
1596 | val = self._copy(field).val() or '' | |
1597 | if isinstance(val, list): | |
1598 | ret.extend(map( | |
1599 | lambda v: (field.attrib['name'], v.replace('\n', '\r\n')), | |
1600 | val | |
1601 | )) | |
1602 | else: | |
1603 | ret.append((field.attrib['name'], val.replace('\n', '\r\n'))) | |
1604 | return ret | |
1605 | ||
1606 | @with_camel_case_alias | |
1607 | def serialize_dict(self): | |
1608 | """Serialize form elements as an ordered dictionary. Multiple values | |
1609 | corresponding to the same input name are concatenated into one list. | |
1610 | ||
1611 | >>> d = PyQuery('''<form> | |
1612 | ... <input name="order" value="spam"> | |
1613 | ... <input name="order" value="eggs"> | |
1614 | ... <input name="order2" value="ham"> | |
1615 | ... </form>''') | |
1616 | >>> d.serialize_dict() | |
1617 | OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) | |
1618 | >>> d.serializeDict() | |
1619 | OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) | |
1620 | """ | |
1621 | ret = OrderedDict() | |
1622 | for name, val in self.serialize_pairs(): | |
1623 | if name not in ret: | |
1624 | ret[name] = val | |
1625 | elif not isinstance(ret[name], list): | |
1626 | ret[name] = [ret[name], val] | |
1627 | else: | |
1628 | ret[name].append(val) | |
1629 | return ret | |
1405 | 1630 | |
1406 | 1631 | @property |
1407 | 1632 | def base_url(self): |
1422 | 1647 | 'You need a base URL to make your links' |
1423 | 1648 | 'absolute. It can be provided by the base_url parameter.')) |
1424 | 1649 | |
1425 | def repl(i, e): | |
1426 | return self(e).attr( | |
1427 | 'href', | |
1428 | urljoin(base_url, self(e).attr('href'))) | |
1429 | ||
1430 | self('a').each(repl) | |
1431 | return self | |
1650 | def repl(attr): | |
1651 | def rep(i, e): | |
1652 | attr_value = self(e).attr(attr) | |
1653 | # when label hasn't such attr, pass | |
1654 | if attr_value is None: | |
1655 | return None | |
1656 | ||
1657 | # skip specific "protocol" schemas | |
1658 | if any(attr_value.startswith(schema) | |
1659 | for schema in ('tel:', 'callto:', 'sms:')): | |
1660 | return None | |
1661 | ||
1662 | return self(e).attr(attr, | |
1663 | urljoin(base_url, attr_value.strip())) | |
1664 | return rep | |
1665 | ||
1666 | self('a').each(repl('href')) | |
1667 | self('link').each(repl('href')) | |
1668 | self('script').each(repl('src')) | |
1669 | self('img').each(repl('src')) | |
1670 | self('iframe').each(repl('src')) | |
1671 | self('form').each(repl('action')) | |
1672 | ||
1673 | return self | |
1674 | ||
1432 | 1675 | |
1433 | 1676 | build_camel_case_aliases(PyQuery) |
0 | # -*- coding: utf-8 -*- | |
1 | try: | |
2 | from deliverance.pyref import PyReference | |
3 | from deliverance import rules | |
4 | from ajax import PyQuery as pq | |
5 | except ImportError: | |
6 | pass | |
7 | else: | |
8 | class PyQuery(rules.AbstractAction): | |
9 | """Python function""" | |
10 | name = 'py' | |
11 | def __init__(self, source_location, pyref): | |
12 | self.source_location = source_location | |
13 | self.pyref = pyref | |
14 | ||
15 | def apply(self, content_doc, theme_doc, resource_fetcher, log): | |
16 | self.pyref(pq([content_doc]), pq([theme_doc]), resource_fetcher, log) | |
17 | ||
18 | @classmethod | |
19 | def from_xml(cls, el, source_location): | |
20 | """Parses and instantiates the class from an element""" | |
21 | pyref = PyReference.parse_xml( | |
22 | el, source_location=source_location, | |
23 | default_function='transform') | |
24 | return cls(source_location, pyref) | |
25 | ||
26 | rules._actions['pyquery'] = PyQuery | |
27 | ||
28 | def deliverance_proxy(): | |
29 | import deliverance.proxycommand | |
30 | deliverance.proxycommand.main() |
0 | import re | |
1 | ||
2 | ||
3 | # https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements#Elements | |
4 | INLINE_TAGS = { | |
5 | 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button', 'cite', | |
6 | 'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd', 'label', 'map', | |
7 | 'object', 'q', 'samp', 'script', 'select', 'small', 'span', 'strong', | |
8 | 'sub', 'sup', 'textarea', 'time', 'tt', 'var' | |
9 | } | |
10 | ||
11 | SEPARATORS = {'br'} | |
12 | ||
13 | ||
14 | # Definition of whitespace in HTML: | |
15 | # https://www.w3.org/TR/html4/struct/text.html#h-9.1 | |
16 | WHITESPACE_RE = re.compile(u'[\x20\x09\x0C\u200B\x0A\x0D]+') | |
17 | ||
18 | ||
19 | def squash_html_whitespace(text): | |
20 | # use raw extract_text for preformatted content (like <pre> content or set | |
21 | # by CSS rules) | |
22 | # apply this function on top of | |
23 | return WHITESPACE_RE.sub(' ', text) | |
24 | ||
25 | ||
26 | def _squash_artifical_nl(parts): | |
27 | output, last_nl = [], False | |
28 | for x in parts: | |
29 | if x is not None: | |
30 | output.append(x) | |
31 | last_nl = False | |
32 | elif not last_nl: | |
33 | output.append(None) | |
34 | last_nl = True | |
35 | return output | |
36 | ||
37 | ||
38 | def _strip_artifical_nl(parts): | |
39 | if not parts: | |
40 | return parts | |
41 | for start_idx, pt in enumerate(parts): | |
42 | if isinstance(pt, str): | |
43 | # 0, 1, 2, index of first string [start_idx:... | |
44 | break | |
45 | iterator = enumerate(parts[:start_idx - 1 if start_idx > 0 else None:-1]) | |
46 | for end_idx, pt in iterator: | |
47 | if isinstance(pt, str): # 0=None, 1=-1, 2=-2, index of last string | |
48 | break | |
49 | return parts[start_idx:-end_idx if end_idx > 0 else None] | |
50 | ||
51 | ||
52 | def _merge_original_parts(parts): | |
53 | output, orp_buf = [], [] | |
54 | ||
55 | def flush(): | |
56 | if orp_buf: | |
57 | item = squash_html_whitespace(''.join(orp_buf)).strip() | |
58 | if item: | |
59 | output.append(item) | |
60 | orp_buf[:] = [] | |
61 | ||
62 | for x in parts: | |
63 | if not isinstance(x, str): | |
64 | flush() | |
65 | output.append(x) | |
66 | else: | |
67 | orp_buf.append(x) | |
68 | flush() | |
69 | return output | |
70 | ||
71 | ||
72 | def extract_text_array(dom, squash_artifical_nl=True, strip_artifical_nl=True): | |
73 | if callable(dom.tag): | |
74 | return '' | |
75 | r = [] | |
76 | if dom.tag in SEPARATORS: | |
77 | r.append(True) # equivalent of '\n' used to designate separators | |
78 | elif dom.tag not in INLINE_TAGS: | |
79 | # equivalent of '\n' used to designate artifically inserted newlines | |
80 | r.append(None) | |
81 | if dom.text is not None: | |
82 | r.append(dom.text) | |
83 | for child in dom.getchildren(): | |
84 | r.extend(extract_text_array(child, squash_artifical_nl=False, | |
85 | strip_artifical_nl=False)) | |
86 | if child.tail is not None: | |
87 | r.append(child.tail) | |
88 | if dom.tag not in INLINE_TAGS and dom.tag not in SEPARATORS: | |
89 | # equivalent of '\n' used to designate artifically inserted newlines | |
90 | r.append(None) | |
91 | if squash_artifical_nl: | |
92 | r = _squash_artifical_nl(r) | |
93 | if strip_artifical_nl: | |
94 | r = _strip_artifical_nl(r) | |
95 | return r | |
96 | ||
97 | ||
98 | def extract_text(dom, block_symbol='\n', sep_symbol='\n', squash_space=True): | |
99 | a = extract_text_array(dom, squash_artifical_nl=squash_space) | |
100 | if squash_space: | |
101 | a = _strip_artifical_nl(_squash_artifical_nl(_merge_original_parts(a))) | |
102 | result = ''.join( | |
103 | block_symbol if x is None else ( | |
104 | sep_symbol if x is True else x | |
105 | ) | |
106 | for x in a | |
107 | ) | |
108 | if squash_space: | |
109 | result = result.strip() | |
110 | return result |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 2.1 | |
1 | 1 | Name: pyquery |
2 | Version: 1.2.9 | |
2 | Version: 1.4.4.dev0 | |
3 | 3 | Summary: A jquery-like library for python |
4 | 4 | Home-page: https://github.com/gawel/pyquery |
5 | Author: Gael Pasgrimaud | |
6 | Author-email: gael@gawel.org | |
5 | Author: Olivier Lauzanne | |
6 | Author-email: olauzanne@gmail.com | |
7 | Maintainer: Gael Pasgrimaud | |
8 | Maintainer-email: gael@gawel.org | |
7 | 9 | License: BSD |
8 | 10 | Description: |
9 | 11 | pyquery: a jquery-like library for python |
10 | 12 | ========================================= |
11 | 13 | |
14 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
15 | :alt: Build Status | |
16 | :target: https://travis-ci.org/gawel/pyquery | |
17 | ||
12 | 18 | pyquery allows you to make jquery queries on xml documents. |
13 | 19 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast |
14 | 20 | xml and html manipulation. |
19 | 25 | |
20 | 26 | The `project`_ is being actively developped on a git repository on Github. I |
21 | 27 | have the policy of giving push access to anyone who wants it and then to review |
22 | what he does. So if you want to contribute just email me. | |
28 | what they do. So if you want to contribute just email me. | |
23 | 29 | |
24 | 30 | Please report bugs on the `github |
25 | 31 | <https://github.com/gawel/pyquery/issues>`_ issue |
27 | 33 | |
28 | 34 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance |
29 | 35 | .. _project: https://github.com/gawel/pyquery/ |
36 | ||
37 | I've spent hours maintaining this software, with love. | |
38 | Please consider tiping if you like it: | |
39 | ||
40 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
41 | ||
42 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
43 | ||
44 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
45 | ||
46 | .. | |
47 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
30 | 48 | |
31 | 49 | Quickstart |
32 | 50 | ========== |
72 | 90 | News |
73 | 91 | ==== |
74 | 92 | |
93 | 1.4.4 (unreleased) | |
94 | ------------------ | |
95 | ||
96 | - Add nextUntil method | |
97 | ||
98 | ||
99 | 1.4.3 (2020-11-21) | |
100 | ------------------ | |
101 | ||
102 | - No longer use a universal wheel | |
103 | ||
104 | ||
105 | 1.4.2 (2020-11-21) | |
106 | ------------------ | |
107 | ||
108 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
109 | ||
110 | - python2 is no longer supported | |
111 | ||
112 | 1.4.1 (2019-10-26) | |
113 | ------------------ | |
114 | ||
115 | - This is the latest release with py2 support | |
116 | ||
117 | - Remove py33, py34 support | |
118 | ||
119 | - web scraping improvements: default timeout and session support | |
120 | ||
121 | - Add API methods to serialize form-related elements according to spec | |
122 | ||
123 | - Include HTML markup when querying textarea text/value | |
124 | ||
125 | ||
126 | 1.4.0 (2018-01-11) | |
127 | ------------------ | |
128 | ||
129 | - Refactoring of `.text()` to match firefox behavior. | |
130 | ||
131 | ||
132 | 1.3.0 (2017-10-21) | |
133 | ------------------ | |
134 | ||
135 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
136 | ||
137 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
138 | ||
139 | - Run tests with python3.6 on CI | |
140 | ||
141 | - Add a ``method`` argument to ``.outer_html()`` | |
142 | ||
143 | ||
144 | 1.2.17 (2016-10-14) | |
145 | ------------------- | |
146 | ||
147 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
148 | - ``PyQuery('<input>').val()`` is ``''`` | |
149 | ||
150 | ||
151 | 1.2.16 (2016-10-14) | |
152 | ------------------- | |
153 | ||
154 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
155 | ||
156 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
157 | ``'on'`` | |
158 | ||
159 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
160 | ``'on'`` | |
161 | ||
162 | - ``<select>`` without ``<option selected>`` have the value of their first | |
163 | ``<option>`` (or ``None`` if there are no options) | |
164 | ||
165 | ||
166 | 1.2.15 (2016-10-11) | |
167 | ------------------- | |
168 | ||
169 | - .val() should never raise | |
170 | ||
171 | - drop py26 support | |
172 | ||
173 | - improve .extend() by returning self | |
174 | ||
175 | ||
176 | 1.2.14 (2016-10-10) | |
177 | ------------------- | |
178 | ||
179 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
180 | ||
181 | ||
182 | 1.2.13 (2016-04-12) | |
183 | ------------------- | |
184 | ||
185 | - Note explicit support for Python 3.5 | |
186 | ||
187 | 1.2.12 (2016-04-12) | |
188 | ------------------- | |
189 | ||
190 | - make_links_absolute now take care of whitespaces | |
191 | ||
192 | - added pseudo selector :has() | |
193 | ||
194 | - add cookies arguments as allowed arguments for requests | |
195 | ||
196 | ||
197 | 1.2.11 (2016-02-02) | |
198 | ------------------- | |
199 | ||
200 | - Preserve namespaces attribute on PyQuery copies. | |
201 | ||
202 | - Do not raise an error when the http response code is 2XX | |
203 | ||
204 | 1.2.10 (2016-01-05) | |
205 | ------------------- | |
206 | ||
207 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
208 | ||
209 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
210 | ||
211 | - Fixed #112: make_links_absolute does not apply to form actions | |
212 | ||
213 | - Fixed #98: contains act like jQuery | |
214 | ||
215 | ||
75 | 216 | 1.2.9 (2014-08-22) |
76 | 217 | ------------------ |
77 | 218 | |
114 | 255 | 1.2.6 (2013-10-11) |
115 | 256 | ------------------ |
116 | 257 | |
117 | README_fixt.py was not include in the release. Fix #54. | |
258 | - README_fixt.py was not include in the release. Fix #54. | |
118 | 259 | |
119 | 260 | |
120 | 261 | 1.2.5 (2013-10-10) |
121 | 262 | ------------------ |
122 | 263 | |
123 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
124 | ||
125 | tests improvments. no longer require a eth connection. | |
126 | ||
127 | fix #55 | |
264 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
265 | ||
266 | - tests improvments. no longer require a eth connection. | |
267 | ||
268 | - fix #55 | |
128 | 269 | |
129 | 270 | 1.2.4 |
130 | 271 | ----- |
131 | 272 | |
132 | Moved to github. So a few files are renamed from .txt to .rst | |
133 | ||
134 | Added .xhtml_to_html() and .remove_namespaces() | |
135 | ||
136 | Use requests to fetch urls (if available) | |
137 | ||
138 | Use restkit's proxy instead of Paste (which will die with py3) | |
139 | ||
140 | Allow to open https urls | |
141 | ||
142 | python2.5 is no longer supported (may work, but tests are broken) | |
273 | - Moved to github. So a few files are renamed from .txt to .rst | |
274 | ||
275 | - Added .xhtml_to_html() and .remove_namespaces() | |
276 | ||
277 | - Use requests to fetch urls (if available) | |
278 | ||
279 | - Use restkit's proxy instead of Paste (which will die with py3) | |
280 | ||
281 | - Allow to open https urls | |
282 | ||
283 | - python2.5 is no longer supported (may work, but tests are broken) | |
143 | 284 | |
144 | 285 | 1.2.3 |
145 | 286 | ----- |
146 | 287 | |
147 | Allow to pass this in .filter() callback | |
148 | ||
149 | Add .contents() .items() | |
150 | ||
151 | Add tox.ini | |
152 | ||
153 | Bug fixes: fix #35 #55 #64 #66 | |
288 | - Allow to pass this in .filter() callback | |
289 | ||
290 | - Add .contents() .items() | |
291 | ||
292 | - Add tox.ini | |
293 | ||
294 | - Bug fixes: fix #35 #55 #64 #66 | |
154 | 295 | |
155 | 296 | 1.2.2 |
156 | 297 | ----- |
157 | 298 | |
158 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
159 | ||
160 | Fix issue #37 (Caleb Burns) | |
299 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
300 | ||
301 | - Fix issue #37 (Caleb Burns) | |
161 | 302 | |
162 | 303 | 1.2.1 |
163 | 304 | ----- |
164 | 305 | |
165 | Allow to use a custom css translator. | |
166 | ||
167 | Fix issue 44: case problem with xml documents | |
306 | - Allow to use a custom css translator. | |
307 | ||
308 | - Fix issue 44: case problem with xml documents | |
168 | 309 | |
169 | 310 | 1.2 |
170 | 311 | --- |
171 | 312 | |
172 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
173 | 43. | |
174 | ||
175 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
313 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
314 | ||
315 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
176 | 316 | |
177 | 317 | 1.1.1 |
178 | 318 | ----- |
179 | 319 | |
180 | Minor release. Include test file so you can run tests from the tarball. | |
320 | - Minor release. Include test file so you can run tests from the tarball. | |
181 | 321 | |
182 | 322 | |
183 | 323 | 1.1 |
184 | 324 | --- |
185 | 325 | |
186 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
326 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
187 | 327 | |
188 | 328 | |
189 | 329 | 1.0 |
190 | 330 | --- |
191 | 331 | |
192 | fix issues 24 | |
332 | - fix issues 24 | |
193 | 333 | |
194 | 334 | 0.7 |
195 | 335 | --- |
196 | 336 | |
197 | Python 3 compatible | |
198 | ||
199 | Add __unicode__ method | |
200 | ||
201 | Add root and encoding attribute | |
202 | ||
203 | fix issues 19, 20, 22, 23 | |
337 | - Python 3 compatible | |
338 | ||
339 | - Add __unicode__ method | |
340 | ||
341 | - Add root and encoding attribute | |
342 | ||
343 | - fix issues 19, 20, 22, 23 | |
204 | 344 | |
205 | 345 | 0.6.1 |
206 | 346 | ------ |
207 | 347 | |
208 | Move README.txt at package root | |
209 | ||
210 | Add CHANGES.txt and add it to long_description | |
348 | - Move README.txt at package root | |
349 | ||
350 | - Add CHANGES.txt and add it to long_description | |
211 | 351 | |
212 | 352 | 0.6 |
213 | 353 | ---- |
214 | 354 | |
215 | Added PyQuery.outerHtml | |
216 | ||
217 | Added PyQuery.fn | |
218 | ||
219 | Added PyQuery.map | |
220 | ||
221 | Change PyQuery.each behavior to reflect jQuery api | |
355 | - Added PyQuery.outerHtml | |
356 | ||
357 | - Added PyQuery.fn | |
358 | ||
359 | - Added PyQuery.map | |
360 | ||
361 | - Change PyQuery.each behavior to reflect jQuery api | |
222 | 362 | |
223 | 363 | |
224 | 364 | |
228 | 368 | Platform: UNKNOWN |
229 | 369 | Classifier: Intended Audience :: Developers |
230 | 370 | Classifier: Development Status :: 5 - Production/Stable |
231 | Classifier: Programming Language :: Python :: 2 | |
232 | Classifier: Programming Language :: Python :: 2.6 | |
233 | Classifier: Programming Language :: Python :: 2.7 | |
234 | 371 | Classifier: Programming Language :: Python :: 3 |
235 | Classifier: Programming Language :: Python :: 3.3 | |
236 | Classifier: Programming Language :: Python :: 3.4 | |
372 | Classifier: Programming Language :: Python :: 3.5 | |
373 | Classifier: Programming Language :: Python :: 3.6 | |
374 | Classifier: Programming Language :: Python :: 3.7 | |
375 | Provides-Extra: test |
0 | 0 | CHANGES.rst |
1 | LICENSE.txt | |
1 | 2 | MANIFEST.in |
2 | 3 | README.rst |
3 | 4 | README_fixt.py |
4 | buildout.cfg | |
5 | conftest.py | |
6 | pytest.ini | |
5 | 7 | setup.cfg |
6 | 8 | setup.py |
7 | 9 | tox.ini |
8 | 10 | docs/Makefile |
9 | docs/ajax.rst | |
10 | docs/ajax_fixt.py | |
11 | 11 | docs/api.rst |
12 | 12 | docs/attributes.rst |
13 | 13 | docs/changes.rst |
14 | 14 | docs/conf.py |
15 | docs/conftest.py | |
15 | 16 | docs/css.rst |
16 | 17 | docs/future.rst |
17 | 18 | docs/index.rst |
18 | 19 | docs/manipulating.rst |
19 | 20 | docs/pseudo_classes.rst |
20 | 21 | docs/scrap.rst |
21 | docs/scrap_fixt.py | |
22 | 22 | docs/testing.rst |
23 | 23 | docs/tips.rst |
24 | docs/tips_fixt.py | |
25 | 24 | docs/traversing.rst |
26 | 25 | pyquery/__init__.py |
27 | pyquery/ajax.py | |
28 | 26 | pyquery/cssselectpatch.py |
29 | 27 | pyquery/openers.py |
30 | 28 | pyquery/pyquery.py |
31 | pyquery/rules.py | |
29 | pyquery/text.py | |
32 | 30 | pyquery.egg-info/PKG-INFO |
33 | 31 | pyquery.egg-info/SOURCES.txt |
34 | 32 | pyquery.egg-info/dependency_links.txt |
38 | 36 | pyquery.egg-info/top_level.txt |
39 | 37 | tests/__init__.py |
40 | 38 | tests/apps.py |
41 | tests/compat.py | |
39 | tests/browser_base.py | |
42 | 40 | tests/doctests.rst |
41 | tests/geckodriver.sh | |
43 | 42 | tests/invalid.xml |
43 | tests/selenium.sh | |
44 | 44 | tests/test.html |
45 | tests/test_pyquery.py⏎ | |
45 | tests/test_browser.py | |
46 | tests/test_pyquery.py | |
47 | tests/test_real_browser.py⏎ |
0 | cssselect>0.7.9 | |
0 | 1 | lxml>=2.1 |
1 | cssselect | |
2 | ||
3 | [test] | |
4 | pytest | |
5 | pytest-cov | |
6 | requests | |
7 | webob | |
8 | webtest |
0 | ||
1 | [pytest] | |
2 | filterwarnings = | |
3 | ignore::DeprecationWarning | |
4 | doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL | |
5 | addopts = --doctest-modules --doctest-glob="*.rst" --ignore=docs/conf.py |
4 | 4 | doctest-extension = rst |
5 | 5 | doctest-fixtures = _fixt |
6 | 6 | include = docs |
7 | exclude = seleniumtests | |
7 | 8 | cover-package = pyquery |
8 | 9 | with-coverage = 1 |
9 | 10 | doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE |
11 | 12 | [egg_info] |
12 | 13 | tag_build = |
13 | 14 | tag_date = 0 |
14 | tag_svn_revision = 0 | |
15 | 15 |
0 | #-*- coding:utf-8 -*- | |
0 | # -*- coding:utf-8 -*- | |
1 | 1 | # |
2 | 2 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 3 | # |
5 | 5 | |
6 | 6 | from setuptools import setup, find_packages |
7 | 7 | import os |
8 | ||
9 | ||
10 | install_requires = [ | |
11 | 'lxml>=2.1', | |
12 | 'cssselect>0.7.9', | |
13 | ] | |
8 | 14 | |
9 | 15 | |
10 | 16 | def read(*names): |
33 | 39 | |
34 | 40 | """ % read('README', 'CHANGES') |
35 | 41 | |
36 | version = '1.2.9' | |
42 | version = '1.4.4.dev0' | |
37 | 43 | |
38 | 44 | setup(name='pyquery', |
39 | 45 | version=version, |
42 | 48 | classifiers=[ |
43 | 49 | "Intended Audience :: Developers", |
44 | 50 | "Development Status :: 5 - Production/Stable", |
45 | "Programming Language :: Python :: 2", | |
46 | "Programming Language :: Python :: 2.6", | |
47 | "Programming Language :: Python :: 2.7", | |
48 | 51 | "Programming Language :: Python :: 3", |
49 | "Programming Language :: Python :: 3.3", | |
50 | "Programming Language :: Python :: 3.4", | |
52 | "Programming Language :: Python :: 3.5", | |
53 | "Programming Language :: Python :: 3.6", | |
54 | "Programming Language :: Python :: 3.7", | |
51 | 55 | ], |
52 | 56 | keywords='jquery html xml scraping', |
53 | 57 | author='Olivier Lauzanne', |
59 | 63 | packages=find_packages(exclude=[ |
60 | 64 | 'bootstrap', 'bootstrap-py3k', 'docs', 'tests', 'README_fixt' |
61 | 65 | ]), |
66 | extras_require={ | |
67 | 'test': ['requests', 'webob', 'webtest', 'pytest', 'pytest-cov'], | |
68 | }, | |
62 | 69 | include_package_data=True, |
63 | 70 | zip_safe=False, |
64 | install_requires=[ | |
65 | 'lxml>=2.1', | |
66 | 'cssselect', | |
67 | ], | |
71 | install_requires=install_requires, | |
68 | 72 | entry_points=""" |
69 | 73 | # -*- Entry points: -*- |
70 | 74 | """, |
1 | 1 | from webob import Request |
2 | 2 | from webob import Response |
3 | 3 | from webob import exc |
4 | from .compat import b | |
5 | 4 | |
6 | 5 | |
7 | 6 | def input_app(environ, start_response): |
8 | 7 | resp = Response() |
9 | 8 | req = Request(environ) |
10 | 9 | if req.path_info == '/': |
11 | resp.body = b('<input name="youyou" type="text" value="" />') | |
10 | resp.text = '<input name="youyou" type="text" value="" />' | |
12 | 11 | elif req.path_info == '/submit': |
13 | resp.body = b('<input type="submit" value="OK" />') | |
12 | resp.text = '<input type="submit" value="OK" />' | |
14 | 13 | elif req.path_info.startswith('/html'): |
15 | resp.body = b('<html><p>Success</p></html>') | |
14 | resp.text = '<html><p>Success</p></html>' | |
16 | 15 | else: |
17 | resp.body = '' | |
16 | resp.text = '<html></html>' | |
18 | 17 | return resp(environ, start_response) |
19 | 18 | |
20 | 19 | |
22 | 21 | req = Request(environ) |
23 | 22 | response = Response() |
24 | 23 | if req.method == 'GET': |
25 | response.body = b('<pre>Yeah !</pre>') | |
24 | response.text = '<pre>Yeah !</pre>' | |
26 | 25 | else: |
27 | response.body = b('<a href="/plop">Yeah !</a>') | |
26 | response.text = '<a href="/plop">Yeah !</a>' | |
28 | 27 | return response(environ, start_response) |
29 | 28 | |
30 | 29 |
0 | ||
1 | class TextExtractionMixin(): | |
2 | def _prepare_dom(self, html): | |
3 | self.last_html = '<html><body>' + html + '</body></html>' | |
4 | ||
5 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
6 | raise NotImplementedError | |
7 | ||
8 | def test_inline_tags(self): | |
9 | self._simple_test( | |
10 | 'Phas<em>ell</em>us<i> eget </i>sem <b>facilisis</b> justo', | |
11 | 'Phasellus eget sem facilisis justo', | |
12 | 'Phasellus eget sem facilisis justo', | |
13 | ) | |
14 | self._simple_test( | |
15 | 'Phasellus <span> eget </span> sem <b>facilisis\n</b> justo', | |
16 | 'Phasellus eget sem facilisis justo', | |
17 | 'Phasellus eget sem facilisis\n justo', | |
18 | ) | |
19 | self._simple_test( | |
20 | ('Phasellus <span>\n eget\n ' | |
21 | 'sem\n\tfacilisis</span> justo'), | |
22 | 'Phasellus eget sem facilisis justo', | |
23 | 'Phasellus \n eget\n sem\n\tfacilisis justo' | |
24 | ) | |
25 | ||
26 | def test_block_tags(self): | |
27 | self._simple_test( | |
28 | 'Phas<p>ell</p>us<div> eget </div>sem <h1>facilisis</h1> justo', | |
29 | 'Phas\nell\nus\neget\nsem\nfacilisis\njusto', | |
30 | 'Phas\nell\nus\n eget \nsem \nfacilisis\n justo', | |
31 | ) | |
32 | self._simple_test( | |
33 | '<p>In sagittis</p> <p>rutrum</p><p>condimentum</p>', | |
34 | 'In sagittis\nrutrum\ncondimentum', | |
35 | 'In sagittis\n \nrutrum\n\ncondimentum', | |
36 | ) | |
37 | self._simple_test( | |
38 | 'In <p>\nultricies</p>\n erat et <p>\n\n\nmaximus\n\n</p> mollis', | |
39 | 'In\nultricies\nerat et\nmaximus\nmollis', | |
40 | 'In \n\nultricies\n\n erat et \n\n\n\nmaximus\n\n\n mollis', | |
41 | ) | |
42 | self._simple_test( | |
43 | ('Integer <div><div>\n <div>quis commodo</div></div> ' | |
44 | '</div> libero'), | |
45 | 'Integer\nquis commodo\nlibero', | |
46 | 'Integer \n\n\n \nquis commodo\n\n \n libero', | |
47 | ) | |
48 | self._simple_test( | |
49 | 'Heading<ul><li>one</li><li>two</li><li>three</li></ul>', | |
50 | 'Heading\none\ntwo\nthree', | |
51 | 'Heading\n\none\n\ntwo\n\nthree', | |
52 | ) | |
53 | ||
54 | def test_separators(self): | |
55 | self._simple_test( | |
56 | 'Some words<br>test. Another word<br><br> <br> test.', | |
57 | 'Some words\ntest. Another word\n\n\ntest.', | |
58 | 'Some words\ntest. Another word\n\n \n test.', | |
59 | ) | |
60 | self._simple_test( | |
61 | 'Inline <span> splitted by\nbr<br>tag</span> test', | |
62 | 'Inline splitted by br\ntag test', | |
63 | 'Inline splitted by\nbr\ntag test', | |
64 | ) | |
65 | self._simple_test( | |
66 | 'Some words<hr>test. Another word<hr><hr> <hr> test.', | |
67 | 'Some words\ntest. Another word\ntest.', | |
68 | 'Some words\n\ntest. Another word\n\n\n\n \n\n test.', | |
69 | ) | |
70 | ||
71 | def test_strip(self): | |
72 | self._simple_test( | |
73 | ' text\n', | |
74 | 'text', | |
75 | ' text\n', | |
76 | ) | |
77 | ||
78 | def test_ul_li(self): | |
79 | self._simple_test( | |
80 | '<ul> <li> </li> </ul>', | |
81 | '', | |
82 | ' \n \n ' | |
83 | ) |
0 | # -*- coding: utf-8 -*- | |
1 | import sys | |
2 | ||
3 | PY3k = sys.version_info >= (3,) | |
4 | ||
5 | if PY3k: | |
6 | text_type = str | |
7 | ||
8 | def u(value, encoding): | |
9 | return str(value) | |
10 | ||
11 | def b(value): | |
12 | return value.encode('utf-8') | |
13 | else: | |
14 | text_type = unicode | |
15 | ||
16 | def u(value, encoding): # NOQA | |
17 | return unicode(value, encoding) | |
18 | ||
19 | def b(value): # NOQA | |
20 | return str(value) | |
21 | ||
22 | try: | |
23 | from unittest2 import TestCase | |
24 | except ImportError: | |
25 | from unittest import TestCase # NOQA |
0 | #!/bin/bash | |
1 | ||
2 | driver="https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz" | |
3 | ||
4 | [ -f geckodriver ] || wget -cqO- $driver | tar xvzf - |
0 | #!/bin/bash | |
1 | # script to run selenium tests | |
2 | ||
3 | # get geckodriver | |
4 | ./tests/geckodriver.sh | |
5 | ||
6 | # run tox with py3.7 | |
7 | MOZ_HEADLESS=1 PATH=$PATH:$PWD tox -e py37 tests/test_real_browser.py |
0 | import unittest | |
1 | ||
2 | from pyquery.pyquery import PyQuery | |
3 | from .browser_base import TextExtractionMixin | |
4 | ||
5 | ||
6 | class TestInnerText(unittest.TestCase, TextExtractionMixin): | |
7 | def _prepare_dom(self, html): | |
8 | super(TestInnerText, self)._prepare_dom(html) | |
9 | self.pq = PyQuery(self.last_html) | |
10 | ||
11 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
12 | self._prepare_dom(html) | |
13 | text_sq = self.pq.text(squash_space=True, **kwargs) | |
14 | text_nosq = self.pq.text(squash_space=False, **kwargs) | |
15 | self.assertEqual(text_sq, expected_sq) | |
16 | self.assertEqual(text_nosq, expected_nosq) |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | import os |
6 | 4 | import sys |
7 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
5 | import time | |
8 | 6 | from lxml import etree |
9 | from pyquery.pyquery import PyQuery as pq | |
10 | from pyquery.ajax import PyQuery as pqa | |
7 | from pyquery.pyquery import PyQuery as pq, no_default | |
8 | from pyquery.openers import HAS_REQUEST | |
11 | 9 | from webtest import http |
12 | 10 | from webtest.debugapp import debug_app |
13 | from .apps import application | |
14 | from .apps import secure_application | |
15 | from .compat import PY3k | |
16 | from .compat import u | |
17 | from .compat import b | |
18 | from .compat import text_type | |
19 | from .compat import TestCase | |
20 | ||
21 | ||
22 | def not_py3k(func): | |
23 | if not PY3k: | |
24 | return func | |
25 | ||
26 | try: | |
27 | import requests # NOQA | |
28 | HAS_REQUEST = True | |
29 | except ImportError: | |
30 | HAS_REQUEST = False | |
11 | from unittest import TestCase | |
12 | ||
13 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
31 | 14 | |
32 | 15 | |
33 | 16 | dirname = os.path.dirname(os.path.abspath(__file__)) |
39 | 22 | class TestUnicode(TestCase): |
40 | 23 | |
41 | 24 | def test_unicode(self): |
42 | xml = pq(u("<html><p>é</p></html>", 'utf-8')) | |
43 | self.assertEqual(type(xml.html()), text_type) | |
44 | if PY3k: | |
45 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
46 | self.assertEqual(str(xml('p:contains("é")')), '<p>é</p>') | |
47 | else: | |
48 | self.assertEqual(unicode(xml), u("<html><p>é</p></html>", 'utf-8')) | |
49 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
50 | self.assertEqual(str(xml(u('p:contains("é")', 'utf8'))), | |
51 | '<p>é</p>') | |
52 | self.assertEqual(unicode(xml(u('p:contains("é")', 'utf8'))), | |
53 | u('<p>é</p>', 'utf8')) | |
25 | xml = pq(u"<html><p>é</p></html>") | |
26 | self.assertEqual(type(xml.html()), str) | |
27 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
28 | self.assertEqual(str(xml('p:contains("é")')), '<p>é</p>') | |
54 | 29 | |
55 | 30 | |
56 | 31 | class TestAttributeCase(TestCase): |
101 | 76 | <body> |
102 | 77 | <form action="/"> |
103 | 78 | <input name="enabled" type="text" value="test"/> |
79 | <b disabled>Not :disabled</b> | |
104 | 80 | <input name="disabled" type="text" |
105 | 81 | value="disabled" disabled="disabled"/> |
82 | <fieldset> | |
83 | <input name="fieldset-enabled"> | |
84 | </fieldset> | |
85 | <fieldset disabled> | |
86 | <legend> | |
87 | <input name="legend-enabled"> | |
88 | </legend> | |
89 | <input name="fieldset-disabled"> | |
90 | <legend> | |
91 | <input name="legend-disabled"> | |
92 | </legend> | |
93 | <select id="disabled-select"> | |
94 | <optgroup> | |
95 | <option></option> | |
96 | </optgroup> | |
97 | </select> | |
98 | </fieldset> | |
99 | <select> | |
100 | <optgroup id="disabled-optgroup" disabled> | |
101 | <option id="disabled-from-optgroup"></option> | |
102 | <option id="disabled-option" disabled></option> | |
103 | </optgroup> | |
104 | </select> | |
106 | 105 | <input name="file" type="file" /> |
107 | 106 | <select name="select"> |
108 | 107 | <option value="">Choose something</option> |
134 | 133 | <h4>Heading 4</h4> |
135 | 134 | <h5>Heading 5</h5> |
136 | 135 | <h6>Heading 6</h6> |
136 | <div></div> | |
137 | 137 | </body> |
138 | 138 | </html> |
139 | 139 | """ |
140 | 140 | |
141 | 141 | def test_get_root(self): |
142 | doc = pq(b('<?xml version="1.0" encoding="UTF-8"?><root><p/></root>')) | |
142 | doc = pq(b'<?xml version="1.0" encoding="UTF-8"?><root><p/></root>') | |
143 | 143 | self.assertEqual(isinstance(doc.root, etree._ElementTree), True) |
144 | 144 | self.assertEqual(doc.encoding, 'UTF-8') |
145 | ||
146 | child = doc.children().eq(0) | |
147 | self.assertNotEqual(child._parent, no_default) | |
148 | self.assertTrue(isinstance(child.root, etree._ElementTree)) | |
145 | 149 | |
146 | 150 | def test_selector_from_doc(self): |
147 | 151 | doc = etree.fromstring(self.html) |
182 | 186 | self.assertEqual(e('div:lt(1)').text(), 'node1') |
183 | 187 | self.assertEqual(e('div:eq(2)').text(), 'node3') |
184 | 188 | |
185 | #test on the form | |
189 | # test on the form | |
186 | 190 | e = self.klass(self.html4) |
187 | assert len(e(':disabled')) == 1 | |
188 | assert len(e('input:enabled')) == 9 | |
191 | disabled = e(':disabled') | |
192 | self.assertIn(e('[name="disabled"]')[0], disabled) | |
193 | self.assertIn(e('fieldset[disabled]')[0], disabled) | |
194 | self.assertIn(e('[name="legend-disabled"]')[0], disabled) | |
195 | self.assertIn(e('[name="fieldset-disabled"]')[0], disabled) | |
196 | self.assertIn(e('#disabled-optgroup')[0], disabled) | |
197 | self.assertIn(e('#disabled-from-optgroup')[0], disabled) | |
198 | self.assertIn(e('#disabled-option')[0], disabled) | |
199 | self.assertIn(e('#disabled-select')[0], disabled) | |
200 | ||
201 | assert len(disabled) == 8 | |
202 | assert len(e('select:enabled')) == 2 | |
203 | assert len(e('input:enabled')) == 11 | |
189 | 204 | assert len(e(':selected')) == 1 |
190 | 205 | assert len(e(':checked')) == 2 |
191 | 206 | assert len(e(':file')) == 1 |
192 | assert len(e(':input')) == 12 | |
207 | assert len(e(':input')) == 18 | |
193 | 208 | assert len(e(':button')) == 2 |
194 | 209 | assert len(e(':radio')) == 3 |
195 | 210 | assert len(e(':checkbox')) == 3 |
196 | 211 | |
197 | #test on other elements | |
212 | # test on other elements | |
198 | 213 | e = self.klass(self.html5) |
199 | 214 | assert len(e(":header")) == 6 |
200 | 215 | assert len(e(":parent")) == 2 |
201 | assert len(e(":empty")) == 6 | |
202 | assert len(e(":contains('Heading')")) == 6 | |
216 | assert len(e(":empty")) == 1 | |
217 | assert len(e(":contains('Heading')")) == 8 | |
203 | 218 | |
204 | 219 | def test_on_the_fly_dom_creation(self): |
205 | 220 | e = self.klass(self.html) |
219 | 234 | </html> |
220 | 235 | """ |
221 | 236 | |
237 | html2 = """ | |
238 | <html> | |
239 | <body> | |
240 | <dl> | |
241 | <dt id="term-1">term 1</dt> | |
242 | <dd>definition 1-a</dd> | |
243 | <dd>definition 1-b</dd> | |
244 | <dd>definition 1-c</dd> | |
245 | <dd>definition 1-d</dd> | |
246 | <dt id="term-2">term 2</dt> | |
247 | <dd>definition 2-a</dd> | |
248 | <dd class="strange">definition 2-b</dd> | |
249 | <dd>definition 2-c</dd> | |
250 | <dt id="term-3">term 3</dt> | |
251 | <dd>definition 3-a</dd> | |
252 | <dd>definition 3-b</dd> | |
253 | </dl> | |
254 | </body> | |
255 | </html> | |
256 | """ | |
257 | ||
222 | 258 | def test_filter(self): |
223 | 259 | assert len(self.klass('div', self.html).filter('.node3')) == 1 |
224 | 260 | assert len(self.klass('div', self.html).filter('#node2')) == 1 |
262 | 298 | self.html).closest('.node3').attr('id') == 'node2' |
263 | 299 | assert self.klass('.node3', self.html).closest('form') == [] |
264 | 300 | |
301 | def test_next_all(self): | |
302 | d = pq(self.html2) | |
303 | ||
304 | # without filter | |
305 | self.assertEqual( | |
306 | len(d('#term-2').next_all()), 6) | |
307 | # with filter | |
308 | self.assertEqual( | |
309 | len(d('#term-2').next_all('dd')), 5) | |
310 | # when empty | |
311 | self.assertEqual( | |
312 | d('#NOTHING').next_all(), []) | |
313 | ||
314 | def test_next_until(self): | |
315 | d = pq(self.html2) | |
316 | ||
317 | # without filter | |
318 | self.assertEqual( | |
319 | len(d('#term-2').next_until('dt')), 3) | |
320 | # with filter | |
321 | self.assertEqual( | |
322 | len(d('#term-2').next_until('dt', ':not(.strange)')), 2) | |
323 | # when empty | |
324 | self.assertEqual( | |
325 | d('#NOTHING').next_until('*'), []) | |
326 | ||
265 | 327 | |
266 | 328 | class TestOpener(TestCase): |
267 | 329 | |
279 | 341 | |
280 | 342 | doc = pq(url='http://example.com', opener=opener) |
281 | 343 | assert len(doc('.node')) == 1, doc |
344 | ||
345 | ||
346 | class TestConstruction(TestCase): | |
347 | ||
348 | def test_typeerror_on_invalid_value(self): | |
349 | self.assertRaises(TypeError, pq, object()) | |
282 | 350 | |
283 | 351 | |
284 | 352 | class TestComment(TestCase): |
299 | 367 | |
300 | 368 | def test_S_this_inside_callback(self): |
301 | 369 | S = pq(self.html) |
302 | self.assertEqual(S('li').map(lambda i, el: S(this).html()), # NOQA | |
303 | ['Coffee', 'Tea', 'Milk']) | |
370 | self.assertEqual(S('li').map( | |
371 | lambda i, el: S(this).html()), # NOQA | |
372 | ['Coffee', 'Tea', 'Milk'] | |
373 | ) | |
304 | 374 | |
305 | 375 | def test_parameterless_callback(self): |
306 | 376 | S = pq(self.html) |
307 | self.assertEqual(S('li').map(lambda: S(this).html()), # NOQA | |
308 | ['Coffee', 'Tea', 'Milk']) | |
377 | self.assertEqual(S('li').map( | |
378 | lambda: S(this).html()), # NOQA | |
379 | ['Coffee', 'Tea', 'Milk'] | |
380 | ) | |
309 | 381 | |
310 | 382 | |
311 | 383 | class TestHook(TestCase): |
319 | 391 | |
320 | 392 | def test_fn(self): |
321 | 393 | "Example from `PyQuery.Fn` docs." |
322 | fn = lambda: this.map(lambda i, el: pq(this).outerHtml()) | |
394 | fn = lambda: this.map(lambda i, el: pq(this).outerHtml()) # NOQA | |
323 | 395 | pq.fn.listOuterHtml = fn |
324 | 396 | S = pq(self.html) |
325 | 397 | self.assertEqual(S('li').listOuterHtml(), |
327 | 399 | |
328 | 400 | def test_fn_with_kwargs(self): |
329 | 401 | "fn() with keyword arguments." |
330 | pq.fn.test = lambda p=1: pq(this).eq(p) | |
402 | pq.fn.test = lambda p=1: pq(this).eq(p) # NOQA | |
331 | 403 | S = pq(self.html) |
332 | 404 | self.assertEqual(S('li').test(0).text(), 'Coffee') |
333 | 405 | self.assertEqual(S('li').test().text(), 'Tea') |
334 | 406 | self.assertEqual(S('li').test(p=2).text(), 'Milk') |
335 | ||
336 | ||
337 | class TestAjaxSelector(TestSelector): | |
338 | klass = pqa | |
339 | ||
340 | def setUp(self): | |
341 | self.s = http.StopableWSGIServer.create(application) | |
342 | ||
343 | @not_py3k | |
344 | def test_proxy(self): | |
345 | self.s.wait() | |
346 | application_url = self.s.application_url | |
347 | e = self.klass([]) | |
348 | val = e.get(application_url) | |
349 | assert len(val('pre')) == 1, (str(val.response), val) | |
350 | ||
351 | def test_get(self): | |
352 | e = self.klass(app=application) | |
353 | val = e.get('/') | |
354 | assert len(val('pre')) == 1, val | |
355 | ||
356 | def test_secure_get(self): | |
357 | e = self.klass(app=secure_application) | |
358 | val = e.get('/', environ=dict(REMOTE_USER='gawii')) | |
359 | assert len(val('pre')) == 1, val | |
360 | val = e.get('/', REMOTE_USER='gawii') | |
361 | assert len(val('pre')) == 1, val | |
362 | ||
363 | def test_secure_get_not_authorized(self): | |
364 | e = self.klass(app=secure_application) | |
365 | val = e.get('/') | |
366 | assert len(val('pre')) == 0, val | |
367 | ||
368 | def test_post(self): | |
369 | e = self.klass(app=application) | |
370 | val = e.post('/') | |
371 | assert len(val('a')) == 1, val | |
372 | ||
373 | def test_subquery(self): | |
374 | e = self.klass(app=application) | |
375 | n = e('div') | |
376 | val = n.post('/') | |
377 | assert len(val('a')) == 1, val | |
378 | ||
379 | def tearDown(self): | |
380 | self.s.shutdown() | |
381 | 407 | |
382 | 408 | |
383 | 409 | class TestManipulating(TestCase): |
387 | 413 | <a href="/toto2"><img src ="myimage2" />My link text 2</a> |
388 | 414 | </div> |
389 | 415 | ''' |
416 | ||
417 | html2 = ''' | |
418 | <input name="spam" value="Spam"> | |
419 | <input name="eggs" value="Eggs"> | |
420 | <input type="checkbox" value="Bacon"> | |
421 | <input type="radio" value="Ham"> | |
422 | ''' | |
423 | ||
424 | html2_newline = ''' | |
425 | <input id="newline-text" type="text" name="order" value="S | |
426 | pam"> | |
427 | <input id="newline-radio" type="radio" name="order" value="S | |
428 | pam"> | |
429 | ''' | |
430 | ||
431 | html3 = ''' | |
432 | <textarea id="textarea-single">Spam</textarea> | |
433 | <textarea id="textarea-multi">Spam | |
434 | <b>Eggs</b> | |
435 | Bacon</textarea> | |
436 | ''' | |
437 | ||
438 | html4 = ''' | |
439 | <select id="first"> | |
440 | <option value="spam">Spam</option> | |
441 | <option value="eggs">Eggs</option> | |
442 | </select> | |
443 | <select id="second"> | |
444 | <option value="spam">Spam</option> | |
445 | <option value="eggs" selected>Eggs</option> | |
446 | <option value="bacon">Bacon</option> | |
447 | </select> | |
448 | <select id="third"> | |
449 | </select> | |
450 | <select id="fourth"> | |
451 | <option value="spam">Spam</option> | |
452 | <option value="spam">Eggs</option> | |
453 | <option value="spam">Bacon</option> | |
454 | </select> | |
455 | ''' | |
456 | ||
457 | html6 = ''' | |
458 | <select id="first" multiple> | |
459 | <option value="spam" selected>Spam</option> | |
460 | <option value="eggs" selected>Eggs</option> | |
461 | <option value="bacon">Bacon</option> | |
462 | </select> | |
463 | <select id="second" multiple> | |
464 | <option value="spam">Spam</option> | |
465 | <option value="eggs">Eggs</option> | |
466 | <option value="bacon">Bacon</option> | |
467 | </select> | |
468 | <select id="third" multiple> | |
469 | <option value="spam">Spam</option> | |
470 | <option value="spam">Eggs</option> | |
471 | <option value="spam">Bacon</option> | |
472 | </select> | |
473 | ''' | |
474 | ||
475 | html5 = ''' | |
476 | <div> | |
477 | <input id="first" value="spam"> | |
478 | <input id="second" value="eggs"> | |
479 | <textarea id="third">bacon</textarea> | |
480 | </div> | |
481 | ''' | |
482 | ||
483 | def test_attr_empty_string(self): | |
484 | d = pq('<div>') | |
485 | d.attr('value', '') | |
486 | self.assertEqual(d.outer_html(), '<div value=""></div>') | |
487 | self.assertEqual(d.outer_html(method="xml"), '<div value=""/>') | |
390 | 488 | |
391 | 489 | def test_remove(self): |
392 | 490 | d = pq(self.html) |
401 | 499 | d.removeClass('xx') |
402 | 500 | assert 'class' not in str(d), str(d) |
403 | 501 | |
502 | def test_val_for_inputs(self): | |
503 | d = pq(self.html2) | |
504 | self.assertIsNone(d('input[name="none"]').val()) | |
505 | self.assertEqual(d('input[name="spam"]').val(), 'Spam') | |
506 | self.assertEqual(d('input[name="eggs"]').val(), 'Eggs') | |
507 | self.assertEqual(d('input:checkbox').val(), 'Bacon') | |
508 | self.assertEqual(d('input:radio').val(), 'Ham') | |
509 | d('input[name="spam"]').val('42') | |
510 | d('input[name="eggs"]').val('43') | |
511 | d('input:checkbox').val('44') | |
512 | d('input:radio').val('45') | |
513 | self.assertEqual(d('input[name="spam"]').val(), '42') | |
514 | self.assertEqual(d('input[name="eggs"]').val(), '43') | |
515 | self.assertEqual(d('input:checkbox').val(), '44') | |
516 | self.assertEqual(d('input:radio').val(), '45') | |
517 | ||
518 | def test_val_for_inputs_with_newline(self): | |
519 | d = pq(self.html2_newline) | |
520 | self.assertEqual(d('#newline-text').val(), 'Spam') | |
521 | self.assertEqual(d('#newline-radio').val(), 'S\npam') | |
522 | ||
523 | def test_val_for_textarea(self): | |
524 | d = pq(self.html3) | |
525 | self.assertEqual(d('#textarea-single').val(), 'Spam') | |
526 | self.assertEqual(d('#textarea-single').text(), 'Spam') | |
527 | d('#textarea-single').val('42') | |
528 | self.assertEqual(d('#textarea-single').val(), '42') | |
529 | # Note: jQuery still returns 'Spam' here. | |
530 | self.assertEqual(d('#textarea-single').text(), '42') | |
531 | ||
532 | multi_expected = '''Spam\n<b>Eggs</b>\nBacon''' | |
533 | self.assertEqual(d('#textarea-multi').val(), multi_expected) | |
534 | self.assertEqual(d('#textarea-multi').text(), multi_expected) | |
535 | multi_new = '''Bacon\n<b>Eggs</b>\nSpam''' | |
536 | d('#textarea-multi').val(multi_new) | |
537 | self.assertEqual(d('#textarea-multi').val(), multi_new) | |
538 | self.assertEqual(d('#textarea-multi').text(), multi_new) | |
539 | ||
540 | def test_val_for_select(self): | |
541 | d = pq(self.html4) | |
542 | self.assertEqual(d('#first').val(), 'spam') | |
543 | self.assertEqual(d('#second').val(), 'eggs') | |
544 | self.assertIsNone(d('#third').val()) | |
545 | d('#first').val('eggs') | |
546 | d('#second').val('bacon') | |
547 | d('#third').val('eggs') # Selecting non-existing option. | |
548 | self.assertEqual(d('#first').val(), 'eggs') | |
549 | self.assertEqual(d('#second').val(), 'bacon') | |
550 | self.assertIsNone(d('#third').val()) | |
551 | d('#first').val('bacon') # Selecting non-existing option. | |
552 | self.assertEqual(d('#first').val(), 'spam') | |
553 | # Value set based on option order, not value order | |
554 | d('#second').val(['bacon', 'eggs']) | |
555 | self.assertEqual(d('#second').val(), 'eggs') | |
556 | d('#fourth').val(['spam']) | |
557 | self.assertEqual(d('#fourth').val(), 'spam') | |
558 | # Sets first option with matching value | |
559 | self.assertEqual(d('#fourth option[selected]').length, 1) | |
560 | self.assertEqual(d('#fourth option[selected]').text(), 'Spam') | |
561 | ||
562 | def test_val_for_select_multiple(self): | |
563 | d = pq(self.html6) | |
564 | self.assertEqual(d('#first').val(), ['spam', 'eggs']) | |
565 | # Selecting non-existing option. | |
566 | d('#first').val(['eggs', 'sausage', 'bacon']) | |
567 | self.assertEqual(d('#first').val(), ['eggs', 'bacon']) | |
568 | self.assertEqual(d('#second').val(), []) | |
569 | d('#second').val('eggs') | |
570 | self.assertEqual(d('#second').val(), ['eggs']) | |
571 | d('#second').val(['not spam', 'not eggs']) | |
572 | self.assertEqual(d('#second').val(), []) | |
573 | d('#third').val(['spam']) | |
574 | self.assertEqual(d('#third').val(), ['spam', 'spam', 'spam']) | |
575 | ||
576 | def test_val_for_input_and_textarea_given_array_value(self): | |
577 | d = pq('<input type="text">') | |
578 | d('input').val(['spam', 'eggs']) | |
579 | self.assertEqual(d('input').val(), 'spam,eggs') | |
580 | d = pq('<textarea></textarea>') | |
581 | d('textarea').val(['spam', 'eggs']) | |
582 | self.assertEqual(d('textarea').val(), 'spam,eggs') | |
583 | ||
584 | def test_val_for_multiple_elements(self): | |
585 | d = pq(self.html5) | |
586 | # "Get" returns *first* value. | |
587 | self.assertEqual(d('div > *').val(), 'spam') | |
588 | # "Set" updates *every* value. | |
589 | d('div > *').val('42') | |
590 | self.assertEqual(d('#first').val(), '42') | |
591 | self.assertEqual(d('#second').val(), '42') | |
592 | self.assertEqual(d('#third').val(), '42') | |
593 | ||
594 | def test_val_checkbox_no_value_attribute(self): | |
595 | d = pq('<input type="checkbox">') | |
596 | self.assertEqual(d.val(), 'on') | |
597 | d = pq('<input type="checkbox" value="">') | |
598 | self.assertEqual(d.val(), '') | |
599 | ||
600 | def test_val_radio_no_value_attribute(self): | |
601 | d = pq('<input type="radio">') | |
602 | self.assertEqual(d.val(), 'on') | |
603 | ||
604 | def test_val_value_is_empty_string(self): | |
605 | d = pq('<input value="">') | |
606 | self.assertEqual(d.val(), '') | |
607 | ||
608 | def test_val_input_has_no_value_attr(self): | |
609 | d = pq('<input>') | |
610 | self.assertEqual(d.val(), '') | |
611 | ||
612 | def test_html_replacement(self): | |
613 | html = '<div>Not Me<span>Replace Me</span>Not Me</div>' | |
614 | replacement = 'New <em>Contents</em> New' | |
615 | expected = html.replace('Replace Me', replacement) | |
616 | ||
617 | d = pq(html) | |
618 | d.find('span').html(replacement) | |
619 | ||
620 | new_html = d.outerHtml() | |
621 | self.assertEqual(new_html, expected) | |
622 | self.assertIn(replacement, new_html) | |
623 | ||
624 | ||
625 | class TestAjax(TestCase): | |
626 | ||
627 | html = ''' | |
628 | <div id="div"> | |
629 | <input form="dispersed" name="order" value="spam"> | |
630 | </div> | |
631 | <form id="dispersed"> | |
632 | <div><input name="order" value="eggs"></div> | |
633 | <input form="dispersed" name="order" value="ham"> | |
634 | <input form="other-form" name="order" value="nothing"> | |
635 | <input form="" name="order" value="nothing"> | |
636 | </form> | |
637 | <form id="other-form"> | |
638 | <input form="dispersed" name="order" value="tomato"> | |
639 | </form> | |
640 | <form class="no-id"> | |
641 | <input form="dispersed" name="order" value="baked beans"> | |
642 | <input name="spam" value="Spam"> | |
643 | </form> | |
644 | ''' | |
645 | ||
646 | html2 = ''' | |
647 | <form id="first"> | |
648 | <input name="order" value="spam"> | |
649 | <fieldset> | |
650 | <input name="fieldset" value="eggs"> | |
651 | <input id="input" name="fieldset" value="ham"> | |
652 | </fieldset> | |
653 | </form> | |
654 | <form id="datalist"> | |
655 | <datalist><div><input name="datalist" value="eggs"></div></datalist> | |
656 | <input type="checkbox" name="checkbox" checked> | |
657 | <input type="radio" name="radio" checked> | |
658 | </form> | |
659 | ''' | |
660 | ||
661 | html3 = ''' | |
662 | <form> | |
663 | <input name="order" value="spam"> | |
664 | <input id="noname" value="sausage"> | |
665 | <fieldset disabled> | |
666 | <input name="order" value="sausage"> | |
667 | </fieldset> | |
668 | <input name="disabled" value="ham" disabled> | |
669 | <input type="submit" name="submit" value="Submit"> | |
670 | <input type="button" name="button" value=""> | |
671 | <input type="image" name="image" value=""> | |
672 | <input type="reset" name="reset" value="Reset"> | |
673 | <input type="file" name="file" value=""> | |
674 | <button type="submit" name="submit" value="submit"></button> | |
675 | <input type="checkbox" name="spam"> | |
676 | <input type="radio" name="eggs"> | |
677 | </form> | |
678 | ''' | |
679 | ||
680 | html4 = ''' | |
681 | <form> | |
682 | <input name="spam" value="Spam/ | |
683 | spam"> | |
684 | <select name="order" multiple> | |
685 | <option value="baked | |
686 | beans" selected> | |
687 | <option value="tomato" selected> | |
688 | <option value="spam"> | |
689 | </select> | |
690 | <textarea name="multiline">multiple | |
691 | lines | |
692 | of text</textarea> | |
693 | </form> | |
694 | ''' | |
695 | ||
696 | def test_serialize_pairs_form_id(self): | |
697 | d = pq(self.html) | |
698 | self.assertEqual(d('#div').serialize_pairs(), []) | |
699 | self.assertEqual(d('#dispersed').serialize_pairs(), [ | |
700 | ('order', 'spam'), ('order', 'eggs'), ('order', 'ham'), | |
701 | ('order', 'tomato'), ('order', 'baked beans'), | |
702 | ]) | |
703 | self.assertEqual(d('.no-id').serialize_pairs(), [ | |
704 | ('spam', 'Spam'), | |
705 | ]) | |
706 | ||
707 | def test_serialize_pairs_form_controls(self): | |
708 | d = pq(self.html2) | |
709 | self.assertEqual(d('fieldset').serialize_pairs(), [ | |
710 | ('fieldset', 'eggs'), ('fieldset', 'ham'), | |
711 | ]) | |
712 | self.assertEqual(d('#input, fieldset, #first').serialize_pairs(), [ | |
713 | ('order', 'spam'), ('fieldset', 'eggs'), ('fieldset', 'ham'), | |
714 | ('fieldset', 'eggs'), ('fieldset', 'ham'), ('fieldset', 'ham'), | |
715 | ]) | |
716 | self.assertEqual(d('#datalist').serialize_pairs(), [ | |
717 | ('datalist', 'eggs'), ('checkbox', 'on'), ('radio', 'on'), | |
718 | ]) | |
719 | ||
720 | def test_serialize_pairs_filter_controls(self): | |
721 | d = pq(self.html3) | |
722 | self.assertEqual(d('form').serialize_pairs(), [ | |
723 | ('order', 'spam') | |
724 | ]) | |
725 | ||
726 | def test_serialize_pairs_form_values(self): | |
727 | d = pq(self.html4) | |
728 | self.assertEqual(d('form').serialize_pairs(), [ | |
729 | ('spam', 'Spam/spam'), ('order', 'baked\r\nbeans'), | |
730 | ('order', 'tomato'), ('multiline', 'multiple\r\nlines\r\nof text'), | |
731 | ]) | |
732 | ||
733 | def test_serialize_array(self): | |
734 | d = pq(self.html4) | |
735 | self.assertEqual(d('form').serialize_array(), [ | |
736 | {'name': 'spam', 'value': 'Spam/spam'}, | |
737 | {'name': 'order', 'value': 'baked\r\nbeans'}, | |
738 | {'name': 'order', 'value': 'tomato'}, | |
739 | {'name': 'multiline', 'value': 'multiple\r\nlines\r\nof text'}, | |
740 | ]) | |
741 | ||
742 | def test_serialize(self): | |
743 | d = pq(self.html4) | |
744 | self.assertEqual( | |
745 | d('form').serialize(), | |
746 | 'spam=Spam%2Fspam&order=baked%0D%0Abeans&order=tomato&' | |
747 | 'multiline=multiple%0D%0Alines%0D%0Aof%20text' | |
748 | ) | |
749 | ||
750 | def test_serialize_dict(self): | |
751 | d = pq(self.html4) | |
752 | self.assertEqual(d('form').serialize_dict(), { | |
753 | 'spam': 'Spam/spam', | |
754 | 'order': ['baked\r\nbeans', 'tomato'], | |
755 | 'multiline': 'multiple\r\nlines\r\nof text', | |
756 | }) | |
757 | ||
404 | 758 | |
405 | 759 | class TestMakeLinks(TestCase): |
406 | 760 | |
434 | 788 | self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html)) |
435 | 789 | d = pq(self.xml, parser='html') |
436 | 790 | d.after(self.html) # this should not fail |
437 | ||
438 | @not_py3k | |
439 | def test_soup_parser(self): | |
440 | d = pq('<meta><head><title>Hello</head><body onload=crash()>Hi all<p>', | |
441 | parser='soup') | |
442 | self.assertEqual(str(d), ( | |
443 | '<html><meta/><head><title>Hello</title></head>' | |
444 | '<body onload="crash()">Hi all<p/></body></html>')) | |
445 | 791 | |
446 | 792 | def test_replaceWith(self): |
447 | 793 | expected = '''<div class="portlet"> |
471 | 817 | <foo xmlns:bar="http://example.com/bar"> |
472 | 818 | <bar:blah>What</bar:blah> |
473 | 819 | <idiot>123</idiot> |
820 | <baz xmlns="http://example.com/baz" a="b"> | |
821 | <subbaz/> | |
822 | </baz> | |
474 | 823 | </foo>''' |
475 | 824 | |
476 | 825 | xhtml = ''' |
480 | 829 | </body> |
481 | 830 | </html>''' |
482 | 831 | |
832 | namespaces = {'bar': 'http://example.com/bar', | |
833 | 'baz': 'http://example.com/baz'} | |
834 | ||
483 | 835 | def test_selector(self): |
484 | 836 | expected = 'What' |
485 | d = pq(b(self.xml), parser='xml') | |
837 | d = pq(self.xml.encode('utf8'), parser='xml') | |
486 | 838 | val = d('bar|blah', |
487 | namespaces={'bar': 'http://example.com/bar'}).text() | |
839 | namespaces=self.namespaces).text() | |
488 | 840 | self.assertEqual(repr(val), repr(expected)) |
489 | 841 | |
490 | 842 | def test_selector_with_xml(self): |
491 | 843 | expected = 'What' |
492 | d = pq('bar|blah', b(self.xml), parser='xml', | |
493 | namespaces={'bar': 'http://example.com/bar'}) | |
844 | d = pq('bar|blah', self.xml.encode('utf8'), parser='xml', | |
845 | namespaces=self.namespaces) | |
494 | 846 | val = d.text() |
495 | 847 | self.assertEqual(repr(val), repr(expected)) |
496 | 848 | |
502 | 854 | |
503 | 855 | def test_xhtml_namespace(self): |
504 | 856 | expected = 'What' |
505 | d = pq(b(self.xhtml), parser='xml') | |
857 | d = pq(self.xhtml.encode('utf8'), parser='xml') | |
506 | 858 | d.xhtml_to_html() |
507 | 859 | val = d('div').text() |
508 | 860 | self.assertEqual(repr(val), repr(expected)) |
516 | 868 | |
517 | 869 | def test_remove_namespaces(self): |
518 | 870 | expected = 'What' |
519 | d = pq(b(self.xml), parser='xml').remove_namespaces() | |
871 | d = pq(self.xml.encode('utf8'), parser='xml').remove_namespaces() | |
520 | 872 | val = d('blah').text() |
521 | 873 | self.assertEqual(repr(val), repr(expected)) |
874 | ||
875 | def test_persistent_namespaces(self): | |
876 | d = pq(self.xml.encode('utf8'), parser='xml', | |
877 | namespaces=self.namespaces) | |
878 | val = d('bar|blah').text() | |
879 | self.assertEqual(repr(val), repr('What')) | |
880 | ||
881 | def test_namespace_traversal(self): | |
882 | d = pq(self.xml.encode('utf8'), parser='xml', | |
883 | namespaces=self.namespaces) | |
884 | val = d('baz|subbaz').closest('baz|baz').attr('a') | |
885 | self.assertEqual(repr(val), repr('b')) | |
522 | 886 | |
523 | 887 | |
524 | 888 | class TestWebScrapping(TestCase): |
541 | 905 | self.assertIn('REQUEST_METHOD: POST', d('p').text()) |
542 | 906 | self.assertIn('q=foo', d('p').text()) |
543 | 907 | |
908 | def test_session(self): | |
909 | if HAS_REQUEST: | |
910 | import requests | |
911 | session = requests.Session() | |
912 | session.headers.update({'X-FOO': 'bar'}) | |
913 | d = pq(self.application_url, {'q': 'foo'}, | |
914 | method='get', session=session) | |
915 | self.assertIn('HTTP_X_FOO: bar', d('p').text()) | |
916 | else: | |
917 | self.skipTest('no requests library') | |
918 | ||
544 | 919 | def tearDown(self): |
545 | 920 | self.s.shutdown() |
546 | 921 | |
548 | 923 | class TestWebScrappingEncoding(TestCase): |
549 | 924 | |
550 | 925 | def test_get(self): |
551 | if not HAS_REQUEST: | |
552 | return | |
553 | d = pq(u('http://ru.wikipedia.org/wiki/Заглавная_страница', 'utf8'), | |
926 | d = pq(u'http://ru.wikipedia.org/wiki/Заглавная_страница', | |
554 | 927 | method='get') |
555 | 928 | print(d) |
556 | self.assertEqual(d('#n-mainpage a').text(), | |
557 | u('Заглавная страница', 'utf8')) | |
929 | self.assertEqual(d('#pt-login').text(), u'Войти') | |
930 | ||
931 | ||
932 | class TestWebScrappingTimeouts(TestCase): | |
933 | ||
934 | def setUp(self): | |
935 | def app(environ, start_response): | |
936 | start_response('200 OK', [('Content-Type', 'text/plain')]) | |
937 | time.sleep(2) | |
938 | return [b'foobar\n'] | |
939 | self.s = http.StopableWSGIServer.create(app) | |
940 | self.s.wait() | |
941 | self.application_url = self.s.application_url.rstrip('/') | |
942 | ||
943 | def test_get(self): | |
944 | pq(self.application_url) | |
945 | with self.assertRaises(Exception): | |
946 | pq(self.application_url, timeout=1) | |
947 | ||
948 | def tearDown(self): | |
949 | self.s.shutdown() |
0 | import os | |
1 | import unittest | |
2 | from threading import Thread | |
3 | from time import sleep | |
4 | ||
5 | from .browser_base import TextExtractionMixin | |
6 | ||
7 | SELENIUM = 'MOZ_HEADLESS' in os.environ | |
8 | ||
9 | try: | |
10 | from selenium import webdriver | |
11 | from selenium.webdriver.firefox.options import Options | |
12 | except ImportError: | |
13 | SELENIUM = False | |
14 | ||
15 | if SELENIUM: | |
16 | from urllib.parse import urlunsplit | |
17 | from http.server import HTTPServer, BaseHTTPRequestHandler | |
18 | from queue import Queue | |
19 | ||
20 | class BaseTestRequestHandler(BaseHTTPRequestHandler): | |
21 | _last_html = '' | |
22 | ||
23 | def _get_last_html(self): | |
24 | q = self.server.html_queue | |
25 | while not q.empty(): | |
26 | self._last_html = q.get_nowait() | |
27 | return self._last_html | |
28 | ||
29 | def log_request(self, code='-', size='-'): | |
30 | pass | |
31 | ||
32 | def recv_from_testsuite(self, non_blocking=False): | |
33 | q = self.server.in_queue | |
34 | if non_blocking: | |
35 | return None if q.empty() else q.get_nowait() | |
36 | return q.get() | |
37 | ||
38 | def send_to_testsuite(self, value): | |
39 | self.server.out_queue.put(value) | |
40 | ||
41 | class HTMLSnippetSender(BaseTestRequestHandler): | |
42 | last_html = b'' | |
43 | ||
44 | def get_last_html(self): | |
45 | while True: | |
46 | value = self.recv_from_testsuite(non_blocking=True) | |
47 | if value is None: | |
48 | break | |
49 | self.last_html = value | |
50 | return self.last_html | |
51 | ||
52 | def do_GET(self): | |
53 | if self.path == '/': | |
54 | self.send_response(200) | |
55 | self.send_header('Content-Type', 'text/html; charset=utf-8') | |
56 | self.end_headers() | |
57 | self.wfile.write(self.get_last_html().encode('utf-8')) | |
58 | else: | |
59 | self.send_response(404) | |
60 | self.end_headers() | |
61 | ||
62 | class BaseBrowserTest(unittest.TestCase): | |
63 | LOCAL_IP = '127.0.0.1' | |
64 | PORT = 28546 | |
65 | # descendant of BaseBrowserTestRequestHandler | |
66 | REQUEST_HANDLER_CLASS = None | |
67 | ||
68 | @classmethod | |
69 | def setUpClass(cls): | |
70 | cls.to_server_queue = Queue() | |
71 | cls.from_server_queue = Queue() | |
72 | cls.server = HTTPServer((cls.LOCAL_IP, cls.PORT), | |
73 | cls.REQUEST_HANDLER_CLASS) | |
74 | cls.server.in_queue = cls.to_server_queue | |
75 | cls.server.out_queue = cls.from_server_queue | |
76 | cls.server_thread = Thread(target=cls.server.serve_forever) | |
77 | cls.server_thread.daemon = True | |
78 | cls.server_thread.start() | |
79 | options = Options() | |
80 | options.add_argument('-headless') | |
81 | cls.driver = webdriver.Firefox(options=options) | |
82 | sleep(1) | |
83 | ||
84 | @classmethod | |
85 | def tearDownClass(cls): | |
86 | cls.driver.quit() | |
87 | cls.server.shutdown() | |
88 | cls.server.server_close() | |
89 | ||
90 | def send_to_server(self, value): | |
91 | self.to_server_queue.put(value) | |
92 | ||
93 | def recv_from_server(self, non_blocking=False): | |
94 | q = self.from_server_queue | |
95 | if non_blocking: | |
96 | return None if q.empty() else q.get_nowait() | |
97 | return q.get() | |
98 | ||
99 | def open_url(self, path): | |
100 | self.driver.get(urlunsplit( | |
101 | ('http', '{}:{}'.format( | |
102 | self.LOCAL_IP, self.PORT), path, '', ''))) | |
103 | ||
104 | class TestInnerText(BaseBrowserTest, TextExtractionMixin): | |
105 | REQUEST_HANDLER_CLASS = HTMLSnippetSender | |
106 | ||
107 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
108 | self.send_to_server(html) | |
109 | self.open_url('/') | |
110 | ||
111 | selenium_text = self.driver.find_element_by_tag_name('body').text | |
112 | self.assertEqual(selenium_text, expected_sq) | |
113 | ||
114 | # inner_text = self.driver.execute_script( | |
115 | # 'return document.body.innerText') | |
116 | # text_content = self.driver.execute_script( | |
117 | # 'return document.body.textContent') |
0 | 0 | [tox] |
1 | envlist=py26,py27,py33,py34 | |
1 | envlist=py35,py36,py37,py38 | |
2 | 2 | |
3 | 3 | [testenv] |
4 | whitelist_externals= | |
5 | rm | |
6 | passenv= | |
7 | MOZ_HEADLESS | |
4 | 8 | commands = |
5 | {envbindir}/nosetests [] | |
9 | pytest [] | |
6 | 10 | deps = |
7 | cssselect>0.7.9 | |
8 | requests | |
9 | WebOb>1.1.9 | |
10 | WebTest | |
11 | nose | |
12 | coverage | |
13 | unittest2 | |
14 | BeautifulSoup | |
15 | restkit | |
11 | py38: selenium | |
12 | -e .[test] | |
16 | 13 | |
17 | [testenv:py33] | |
18 | changedir={toxinidir} | |
14 | [testenv:flake8] | |
15 | skipsdist=true | |
16 | skip_install=true | |
17 | basepython = python3.8 | |
19 | 18 | commands = |
20 | {envbindir}/nosetests [] | |
19 | flake8 pyquery tests | |
21 | 20 | deps = |
22 | cssselect>0.7.9 | |
23 | requests | |
24 | WebOb>1.1.9 | |
25 | WebTest | |
26 | nose | |
27 | coverage | |
21 | flake8 | |
28 | 22 | |
29 | [testenv:py34] | |
30 | changedir={toxinidir} | |
23 | [testenv:docs] | |
24 | skip_install=false | |
25 | skipsdist=true | |
26 | basepython = python3.8 | |
27 | changedir = docs | |
28 | deps = | |
29 | sphinx | |
30 | Pygments | |
31 | 31 | commands = |
32 | {envbindir}/nosetests [] | |
33 | deps = | |
34 | cssselect>0.7.9 | |
35 | requests | |
36 | WebOb>1.1.9 | |
37 | WebTest | |
38 | nose | |
39 | coverage | |
32 | rm -Rf {envtmpdir}/doctrees {envtmpdir}/html | |
33 | sphinx-build -b html -d {envtmpdir}/doctrees . {envtmpdir}/html | |
34 | ||
35 | # [testenv:selenium] | |
36 | # basepython = python3.5 | |
37 | # deps = | |
38 | # selenium | |
39 | # commands = | |
40 | # {envbindir}/python -m unittest seleniumtests.offline | |
41 | # {envbindir}/python -m unittest seleniumtests.browser |