Import upstream version 1.4.3+git20220717.1.ce99700
Debian Janitor
1 year, 7 months ago
0 | 2.0.0 (unreleased) | |
1 | ------------------ | |
2 | ||
3 | - Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like | |
4 | ``PyQuery("http://example.com")`` will no longer fetch the contents of the URL. | |
5 | Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``. | |
6 | ||
7 | - Add nextUntil method | |
8 | ||
9 | - ``.remove()`` no longer inserts a space in place of the removed element | |
10 | ||
11 | - Fix escaping of top-level element text in ``.html()`` output | |
12 | ||
13 | ||
14 | 1.4.3 (2020-11-21) | |
15 | ------------------ | |
16 | ||
17 | - No longer use a universal wheel | |
18 | ||
19 | ||
20 | 1.4.2 (2020-11-21) | |
21 | ------------------ | |
22 | ||
23 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
24 | ||
25 | - python2 is no longer supported | |
26 | ||
27 | 1.4.1 (2019-10-26) | |
28 | ------------------ | |
29 | ||
30 | - This is the latest release with py2 support | |
31 | ||
32 | - Remove py33, py34 support | |
33 | ||
34 | - web scraping improvements: default timeout and session support | |
35 | ||
36 | - Add API methods to serialize form-related elements according to spec | |
37 | ||
38 | - Include HTML markup when querying textarea text/value | |
39 | ||
40 | ||
41 | 1.4.0 (2018-01-11) | |
42 | ------------------ | |
43 | ||
44 | - Refactoring of `.text()` to match firefox behavior. | |
45 | ||
46 | ||
47 | 1.3.0 (2017-10-21) | |
48 | ------------------ | |
49 | ||
50 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
51 | ||
52 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
53 | ||
54 | - Run tests with python3.6 on CI | |
55 | ||
56 | - Add a ``method`` argument to ``.outer_html()`` | |
57 | ||
58 | ||
59 | 1.2.17 (2016-10-14) | |
60 | ------------------- | |
61 | ||
62 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
63 | - ``PyQuery('<input>').val()`` is ``''`` | |
64 | ||
65 | ||
66 | 1.2.16 (2016-10-14) | |
67 | ------------------- | |
68 | ||
69 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
70 | ||
71 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
72 | ``'on'`` | |
73 | ||
74 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
75 | ``'on'`` | |
76 | ||
77 | - ``<select>`` without ``<option selected>`` have the value of their first | |
78 | ``<option>`` (or ``None`` if there are no options) | |
79 | ||
80 | ||
81 | 1.2.15 (2016-10-11) | |
82 | ------------------- | |
83 | ||
84 | - .val() should never raise | |
85 | ||
86 | - drop py26 support | |
87 | ||
88 | - improve .extend() by returning self | |
89 | ||
90 | ||
91 | 1.2.14 (2016-10-10) | |
92 | ------------------- | |
93 | ||
94 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
95 | ||
96 | ||
97 | 1.2.13 (2016-04-12) | |
98 | ------------------- | |
99 | ||
100 | - Note explicit support for Python 3.5 | |
101 | ||
102 | 1.2.12 (2016-04-12) | |
103 | ------------------- | |
104 | ||
105 | - make_links_absolute now take care of whitespaces | |
106 | ||
107 | - added pseudo selector :has() | |
108 | ||
109 | - add cookies arguments as allowed arguments for requests | |
110 | ||
111 | ||
112 | 1.2.11 (2016-02-02) | |
113 | ------------------- | |
114 | ||
115 | - Preserve namespaces attribute on PyQuery copies. | |
116 | ||
117 | - Do not raise an error when the http response code is 2XX | |
118 | ||
119 | 1.2.10 (2016-01-05) | |
120 | ------------------- | |
121 | ||
122 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
123 | ||
124 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
125 | ||
126 | - Fixed #112: make_links_absolute does not apply to form actions | |
127 | ||
128 | - Fixed #98: contains act like jQuery | |
129 | ||
130 | ||
0 | 131 | 1.2.9 (2014-08-22) |
1 | 132 | ------------------ |
2 | 133 | |
39 | 170 | 1.2.6 (2013-10-11) |
40 | 171 | ------------------ |
41 | 172 | |
42 | README_fixt.py was not include in the release. Fix #54. | |
173 | - README_fixt.py was not include in the release. Fix #54. | |
43 | 174 | |
44 | 175 | |
45 | 176 | 1.2.5 (2013-10-10) |
46 | 177 | ------------------ |
47 | 178 | |
48 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
49 | ||
50 | tests improvments. no longer require a eth connection. | |
51 | ||
52 | fix #55 | |
179 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
180 | ||
181 | - tests improvments. no longer require a eth connection. | |
182 | ||
183 | - fix #55 | |
53 | 184 | |
54 | 185 | 1.2.4 |
55 | 186 | ----- |
56 | 187 | |
57 | Moved to github. So a few files are renamed from .txt to .rst | |
58 | ||
59 | Added .xhtml_to_html() and .remove_namespaces() | |
60 | ||
61 | Use requests to fetch urls (if available) | |
62 | ||
63 | Use restkit's proxy instead of Paste (which will die with py3) | |
64 | ||
65 | Allow to open https urls | |
66 | ||
67 | python2.5 is no longer supported (may work, but tests are broken) | |
188 | - Moved to github. So a few files are renamed from .txt to .rst | |
189 | ||
190 | - Added .xhtml_to_html() and .remove_namespaces() | |
191 | ||
192 | - Use requests to fetch urls (if available) | |
193 | ||
194 | - Use restkit's proxy instead of Paste (which will die with py3) | |
195 | ||
196 | - Allow to open https urls | |
197 | ||
198 | - python2.5 is no longer supported (may work, but tests are broken) | |
68 | 199 | |
69 | 200 | 1.2.3 |
70 | 201 | ----- |
71 | 202 | |
72 | Allow to pass this in .filter() callback | |
73 | ||
74 | Add .contents() .items() | |
75 | ||
76 | Add tox.ini | |
77 | ||
78 | Bug fixes: fix #35 #55 #64 #66 | |
203 | - Allow to pass this in .filter() callback | |
204 | ||
205 | - Add .contents() .items() | |
206 | ||
207 | - Add tox.ini | |
208 | ||
209 | - Bug fixes: fix #35 #55 #64 #66 | |
79 | 210 | |
80 | 211 | 1.2.2 |
81 | 212 | ----- |
82 | 213 | |
83 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
84 | ||
85 | Fix issue #37 (Caleb Burns) | |
214 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
215 | ||
216 | - Fix issue #37 (Caleb Burns) | |
86 | 217 | |
87 | 218 | 1.2.1 |
88 | 219 | ----- |
89 | 220 | |
90 | Allow to use a custom css translator. | |
91 | ||
92 | Fix issue 44: case problem with xml documents | |
221 | - Allow to use a custom css translator. | |
222 | ||
223 | - Fix issue 44: case problem with xml documents | |
93 | 224 | |
94 | 225 | 1.2 |
95 | 226 | --- |
96 | 227 | |
97 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
98 | 43. | |
99 | ||
100 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
228 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
229 | ||
230 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
101 | 231 | |
102 | 232 | 1.1.1 |
103 | 233 | ----- |
104 | 234 | |
105 | Minor release. Include test file so you can run tests from the tarball. | |
235 | - Minor release. Include test file so you can run tests from the tarball. | |
106 | 236 | |
107 | 237 | |
108 | 238 | 1.1 |
109 | 239 | --- |
110 | 240 | |
111 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
241 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
112 | 242 | |
113 | 243 | |
114 | 244 | 1.0 |
115 | 245 | --- |
116 | 246 | |
117 | fix issues 24 | |
247 | - fix issues 24 | |
118 | 248 | |
119 | 249 | 0.7 |
120 | 250 | --- |
121 | 251 | |
122 | Python 3 compatible | |
123 | ||
124 | Add __unicode__ method | |
125 | ||
126 | Add root and encoding attribute | |
127 | ||
128 | fix issues 19, 20, 22, 23 | |
252 | - Python 3 compatible | |
253 | ||
254 | - Add __unicode__ method | |
255 | ||
256 | - Add root and encoding attribute | |
257 | ||
258 | - fix issues 19, 20, 22, 23 | |
129 | 259 | |
130 | 260 | 0.6.1 |
131 | 261 | ------ |
132 | 262 | |
133 | Move README.txt at package root | |
134 | ||
135 | Add CHANGES.txt and add it to long_description | |
263 | - Move README.txt at package root | |
264 | ||
265 | - Add CHANGES.txt and add it to long_description | |
136 | 266 | |
137 | 267 | 0.6 |
138 | 268 | ---- |
139 | 269 | |
140 | Added PyQuery.outerHtml | |
141 | ||
142 | Added PyQuery.fn | |
143 | ||
144 | Added PyQuery.map | |
145 | ||
146 | Change PyQuery.each behavior to reflect jQuery api | |
147 | ||
148 | ||
270 | - Added PyQuery.outerHtml | |
271 | ||
272 | - Added PyQuery.fn | |
273 | ||
274 | - Added PyQuery.map | |
275 | ||
276 | - Change PyQuery.each behavior to reflect jQuery api | |
277 | ||
278 |
0 | Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> | |
1 | ||
2 | Redistribution and use in source and binary forms, with or without | |
3 | modification, are permitted provided that the following conditions are | |
4 | met: | |
5 | ||
6 | 1. Redistributions of source code must retain the above copyright | |
7 | notice, this list of conditions and the following disclaimer. | |
8 | ||
9 | 2. Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | ||
14 | 3. Neither the name of Infrae nor the names of its contributors may | |
15 | be used to endorse or promote products derived from this software | |
16 | without specific prior written permission. | |
17 | ||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR | |
22 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
1 | 1 | prune docs/_build |
2 | 2 | graft pyquery |
3 | 3 | graft tests |
4 | include *.py | |
5 | include *.txt | |
4 | 6 | include *_fixt.py *.rst *.cfg *.ini |
5 | 7 | global-exclude *.pyc |
6 | 8 | global-exclude __pycache__ |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 2.1 | |
1 | 1 | Name: pyquery |
2 | Version: 1.2.9 | |
2 | Version: 2.0.0.dev0 | |
3 | 3 | Summary: A jquery-like library for python |
4 | 4 | Home-page: https://github.com/gawel/pyquery |
5 | Author: Gael Pasgrimaud | |
6 | Author-email: gael@gawel.org | |
5 | Author: Olivier Lauzanne | |
6 | Author-email: olauzanne@gmail.com | |
7 | Maintainer: Gael Pasgrimaud | |
8 | Maintainer-email: gael@gawel.org | |
7 | 9 | License: BSD |
8 | Description: | |
9 | pyquery: a jquery-like library for python | |
10 | ========================================= | |
11 | ||
12 | pyquery allows you to make jquery queries on xml documents. | |
13 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast | |
14 | xml and html manipulation. | |
15 | ||
16 | This is not (or at least not yet) a library to produce or interact with | |
17 | javascript code. I just liked the jquery API and I missed it in python so I | |
18 | told myself "Hey let's make jquery in python". This is the result. | |
19 | ||
20 | The `project`_ is being actively developped on a git repository on Github. I | |
21 | have the policy of giving push access to anyone who wants it and then to review | |
22 | what he does. So if you want to contribute just email me. | |
23 | ||
24 | Please report bugs on the `github | |
25 | <https://github.com/gawel/pyquery/issues>`_ issue | |
26 | tracker. | |
27 | ||
28 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance | |
29 | .. _project: https://github.com/gawel/pyquery/ | |
30 | ||
31 | Quickstart | |
32 | ========== | |
33 | ||
34 | You can use the PyQuery class to load an xml document from a string, a lxml | |
35 | document, from a file or from an url:: | |
36 | ||
37 | >>> from pyquery import PyQuery as pq | |
38 | >>> from lxml import etree | |
39 | >>> import urllib | |
40 | >>> d = pq("<html></html>") | |
41 | >>> d = pq(etree.fromstring("<html></html>")) | |
42 | >>> d = pq(url=your_url) | |
43 | >>> d = pq(url=your_url, | |
44 | ... opener=lambda url, **kw: urlopen(url).read()) | |
45 | >>> d = pq(filename=path_to_html_file) | |
46 | ||
47 | Now d is like the $ in jquery:: | |
48 | ||
49 | >>> d("#hello") | |
50 | [<p#hello.hello>] | |
51 | >>> p = d("#hello") | |
52 | >>> print(p.html()) | |
53 | Hello world ! | |
54 | >>> p.html("you know <a href='http://python.org/'>Python</a> rocks") | |
55 | [<p#hello.hello>] | |
56 | >>> print(p.html()) | |
57 | you know <a href="http://python.org/">Python</a> rocks | |
58 | >>> print(p.text()) | |
59 | you know Python rocks | |
60 | ||
61 | You can use some of the pseudo classes that are available in jQuery but that | |
62 | are not standard in css such as :first :last :even :odd :eq :lt :gt :checked | |
63 | :selected :file:: | |
64 | ||
65 | >>> d('p:first') | |
66 | [<p#hello.hello>] | |
67 | ||
68 | ||
69 | ||
70 | See http://pyquery.rtfd.org/ for the full documentation | |
71 | ||
72 | News | |
73 | ==== | |
74 | ||
75 | 1.2.9 (2014-08-22) | |
76 | ------------------ | |
77 | ||
78 | - Support for keyword arguments in PyQuery custom functions | |
79 | ||
80 | - Fixed #78: items must take care or the parent | |
81 | ||
82 | - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute | |
83 | when it isn't there | |
84 | ||
85 | - Fixed #19. ``is_()`` was broken. | |
86 | ||
87 | - Fixed #9. ``.replaceWith(PyQuery element)`` raises error | |
88 | ||
89 | - Remove official python3.2 support (mostly because of 3rd party semi-deps) | |
90 | ||
91 | ||
92 | 1.2.8 (2013-12-21) | |
93 | ------------------ | |
94 | ||
95 | - Fixed #22: Open by filename fails when file contains invalid xml | |
96 | ||
97 | - Bug fix in .remove_class() | |
98 | ||
99 | ||
100 | 1.2.7 (2013-12-21) | |
101 | ------------------ | |
102 | ||
103 | - Use pep8 name for methods but keep an alias for camel case method. | |
104 | Eg: remove_attr and removeAttr works | |
105 | Fix #57 | |
106 | ||
107 | - .text() now return an empty string instead of None if there is no text node. | |
108 | Fix #45 | |
109 | ||
110 | - Fixed #23: removeClass adds class attribute to elements which previously | |
111 | lacked one | |
112 | ||
113 | ||
114 | 1.2.6 (2013-10-11) | |
115 | ------------------ | |
116 | ||
117 | README_fixt.py was not include in the release. Fix #54. | |
118 | ||
119 | ||
120 | 1.2.5 (2013-10-10) | |
121 | ------------------ | |
122 | ||
123 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
124 | ||
125 | tests improvments. no longer require a eth connection. | |
126 | ||
127 | fix #55 | |
128 | ||
129 | 1.2.4 | |
130 | ----- | |
131 | ||
132 | Moved to github. So a few files are renamed from .txt to .rst | |
133 | ||
134 | Added .xhtml_to_html() and .remove_namespaces() | |
135 | ||
136 | Use requests to fetch urls (if available) | |
137 | ||
138 | Use restkit's proxy instead of Paste (which will die with py3) | |
139 | ||
140 | Allow to open https urls | |
141 | ||
142 | python2.5 is no longer supported (may work, but tests are broken) | |
143 | ||
144 | 1.2.3 | |
145 | ----- | |
146 | ||
147 | Allow to pass this in .filter() callback | |
148 | ||
149 | Add .contents() .items() | |
150 | ||
151 | Add tox.ini | |
152 | ||
153 | Bug fixes: fix #35 #55 #64 #66 | |
154 | ||
155 | 1.2.2 | |
156 | ----- | |
157 | ||
158 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
159 | ||
160 | Fix issue #37 (Caleb Burns) | |
161 | ||
162 | 1.2.1 | |
163 | ----- | |
164 | ||
165 | Allow to use a custom css translator. | |
166 | ||
167 | Fix issue 44: case problem with xml documents | |
168 | ||
169 | 1.2 | |
170 | --- | |
171 | ||
172 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
173 | 43. | |
174 | ||
175 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
176 | ||
177 | 1.1.1 | |
178 | ----- | |
179 | ||
180 | Minor release. Include test file so you can run tests from the tarball. | |
181 | ||
182 | ||
183 | 1.1 | |
184 | --- | |
185 | ||
186 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
187 | ||
188 | ||
189 | 1.0 | |
190 | --- | |
191 | ||
192 | fix issues 24 | |
193 | ||
194 | 0.7 | |
195 | --- | |
196 | ||
197 | Python 3 compatible | |
198 | ||
199 | Add __unicode__ method | |
200 | ||
201 | Add root and encoding attribute | |
202 | ||
203 | fix issues 19, 20, 22, 23 | |
204 | ||
205 | 0.6.1 | |
206 | ------ | |
207 | ||
208 | Move README.txt at package root | |
209 | ||
210 | Add CHANGES.txt and add it to long_description | |
211 | ||
212 | 0.6 | |
213 | ---- | |
214 | ||
215 | Added PyQuery.outerHtml | |
216 | ||
217 | Added PyQuery.fn | |
218 | ||
219 | Added PyQuery.map | |
220 | ||
221 | Change PyQuery.each behavior to reflect jQuery api | |
222 | ||
223 | ||
224 | ||
225 | ||
226 | ||
227 | 10 | Keywords: jquery html xml scraping |
228 | 11 | Platform: UNKNOWN |
229 | 12 | Classifier: Intended Audience :: Developers |
230 | 13 | Classifier: Development Status :: 5 - Production/Stable |
231 | Classifier: Programming Language :: Python :: 2 | |
232 | Classifier: Programming Language :: Python :: 2.6 | |
233 | Classifier: Programming Language :: Python :: 2.7 | |
234 | 14 | Classifier: Programming Language :: Python :: 3 |
235 | Classifier: Programming Language :: Python :: 3.3 | |
236 | Classifier: Programming Language :: Python :: 3.4 | |
15 | Classifier: Programming Language :: Python :: 3.5 | |
16 | Classifier: Programming Language :: Python :: 3.6 | |
17 | Classifier: Programming Language :: Python :: 3.7 | |
18 | Provides-Extra: test | |
19 | License-File: LICENSE.txt | |
20 | ||
21 | ||
22 | pyquery: a jquery-like library for python | |
23 | ========================================= | |
24 | ||
25 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
26 | :alt: Build Status | |
27 | :target: https://travis-ci.org/gawel/pyquery | |
28 | ||
29 | pyquery allows you to make jquery queries on xml documents. | |
30 | The API is as much as possible similar to jquery. pyquery uses lxml for fast | |
31 | xml and html manipulation. | |
32 | ||
33 | This is not (or at least not yet) a library to produce or interact with | |
34 | javascript code. I just liked the jquery API and I missed it in python so I | |
35 | told myself "Hey let's make jquery in python". This is the result. | |
36 | ||
37 | The `project`_ is being actively developed on a git repository on Github. I | |
38 | have the policy of giving push access to anyone who wants it and then reviewing | |
39 | what they do. So if you want to contribute just email me. | |
40 | ||
41 | Please report bugs on the `github | |
42 | <https://github.com/gawel/pyquery/issues>`_ issue | |
43 | tracker. | |
44 | ||
45 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance | |
46 | .. _project: https://github.com/gawel/pyquery/ | |
47 | ||
48 | I've spent hours maintaining this software, with love. | |
49 | Please consider tipping if you like it: | |
50 | ||
51 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
52 | ||
53 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
54 | ||
55 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
56 | ||
57 | .. | |
58 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
59 | ||
60 | Quickstart | |
61 | ========== | |
62 | ||
63 | You can use the PyQuery class to load an xml document from a string, a lxml | |
64 | document, from a file or from an url:: | |
65 | ||
66 | >>> from pyquery import PyQuery as pq | |
67 | >>> from lxml import etree | |
68 | >>> import urllib | |
69 | >>> d = pq("<html></html>") | |
70 | >>> d = pq(etree.fromstring("<html></html>")) | |
71 | >>> d = pq(url=your_url) | |
72 | >>> d = pq(url=your_url, | |
73 | ... opener=lambda url, **kw: urlopen(url).read()) | |
74 | >>> d = pq(filename=path_to_html_file) | |
75 | ||
76 | Now d is like the $ in jquery:: | |
77 | ||
78 | >>> d("#hello") | |
79 | [<p#hello.hello>] | |
80 | >>> p = d("#hello") | |
81 | >>> print(p.html()) | |
82 | Hello world ! | |
83 | >>> p.html("you know <a href='http://python.org/'>Python</a> rocks") | |
84 | [<p#hello.hello>] | |
85 | >>> print(p.html()) | |
86 | you know <a href="http://python.org/">Python</a> rocks | |
87 | >>> print(p.text()) | |
88 | you know Python rocks | |
89 | ||
90 | You can use some of the pseudo classes that are available in jQuery but that | |
91 | are not standard in css such as :first :last :even :odd :eq :lt :gt :checked | |
92 | :selected :file:: | |
93 | ||
94 | >>> d('p:first') | |
95 | [<p#hello.hello>] | |
96 | ||
97 | ||
98 | ||
99 | See http://pyquery.rtfd.org/ for the full documentation | |
100 | ||
101 | News | |
102 | ==== | |
103 | ||
104 | 2.0.0 (unreleased) | |
105 | ------------------ | |
106 | ||
107 | - Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like | |
108 | ``PyQuery("http://example.com")`` will no longer fetch the contents of the URL. | |
109 | Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``. | |
110 | ||
111 | - Add nextUntil method | |
112 | ||
113 | - ``.remove()`` no longer inserts a space in place of the removed element | |
114 | ||
115 | - Fix escaping of top-level element text in ``.html()`` output | |
116 | ||
117 | ||
118 | 1.4.3 (2020-11-21) | |
119 | ------------------ | |
120 | ||
121 | - No longer use a universal wheel | |
122 | ||
123 | ||
124 | 1.4.2 (2020-11-21) | |
125 | ------------------ | |
126 | ||
127 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
128 | ||
129 | - python2 is no longer supported | |
130 | ||
131 | 1.4.1 (2019-10-26) | |
132 | ------------------ | |
133 | ||
134 | - This is the latest release with py2 support | |
135 | ||
136 | - Remove py33, py34 support | |
137 | ||
138 | - web scraping improvements: default timeout and session support | |
139 | ||
140 | - Add API methods to serialize form-related elements according to spec | |
141 | ||
142 | - Include HTML markup when querying textarea text/value | |
143 | ||
144 | ||
145 | 1.4.0 (2018-01-11) | |
146 | ------------------ | |
147 | ||
148 | - Refactoring of `.text()` to match firefox behavior. | |
149 | ||
150 | ||
151 | 1.3.0 (2017-10-21) | |
152 | ------------------ | |
153 | ||
154 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
155 | ||
156 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
157 | ||
158 | - Run tests with python3.6 on CI | |
159 | ||
160 | - Add a ``method`` argument to ``.outer_html()`` | |
161 | ||
162 | ||
163 | 1.2.17 (2016-10-14) | |
164 | ------------------- | |
165 | ||
166 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
167 | - ``PyQuery('<input>').val()`` is ``''`` | |
168 | ||
169 | ||
170 | 1.2.16 (2016-10-14) | |
171 | ------------------- | |
172 | ||
173 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
174 | ||
175 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
176 | ``'on'`` | |
177 | ||
178 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
179 | ``'on'`` | |
180 | ||
181 | - ``<select>`` without ``<option selected>`` have the value of their first | |
182 | ``<option>`` (or ``None`` if there are no options) | |
183 | ||
184 | ||
185 | 1.2.15 (2016-10-11) | |
186 | ------------------- | |
187 | ||
188 | - .val() should never raise | |
189 | ||
190 | - drop py26 support | |
191 | ||
192 | - improve .extend() by returning self | |
193 | ||
194 | ||
195 | 1.2.14 (2016-10-10) | |
196 | ------------------- | |
197 | ||
198 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
199 | ||
200 | ||
201 | 1.2.13 (2016-04-12) | |
202 | ------------------- | |
203 | ||
204 | - Note explicit support for Python 3.5 | |
205 | ||
206 | 1.2.12 (2016-04-12) | |
207 | ------------------- | |
208 | ||
209 | - make_links_absolute now take care of whitespaces | |
210 | ||
211 | - added pseudo selector :has() | |
212 | ||
213 | - add cookies arguments as allowed arguments for requests | |
214 | ||
215 | ||
216 | 1.2.11 (2016-02-02) | |
217 | ------------------- | |
218 | ||
219 | - Preserve namespaces attribute on PyQuery copies. | |
220 | ||
221 | - Do not raise an error when the http response code is 2XX | |
222 | ||
223 | 1.2.10 (2016-01-05) | |
224 | ------------------- | |
225 | ||
226 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
227 | ||
228 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
229 | ||
230 | - Fixed #112: make_links_absolute does not apply to form actions | |
231 | ||
232 | - Fixed #98: contains act like jQuery | |
233 | ||
234 | ||
235 | 1.2.9 (2014-08-22) | |
236 | ------------------ | |
237 | ||
238 | - Support for keyword arguments in PyQuery custom functions | |
239 | ||
240 | - Fixed #78: items must take care or the parent | |
241 | ||
242 | - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute | |
243 | when it isn't there | |
244 | ||
245 | - Fixed #19. ``is_()`` was broken. | |
246 | ||
247 | - Fixed #9. ``.replaceWith(PyQuery element)`` raises error | |
248 | ||
249 | - Remove official python3.2 support (mostly because of 3rd party semi-deps) | |
250 | ||
251 | ||
252 | 1.2.8 (2013-12-21) | |
253 | ------------------ | |
254 | ||
255 | - Fixed #22: Open by filename fails when file contains invalid xml | |
256 | ||
257 | - Bug fix in .remove_class() | |
258 | ||
259 | ||
260 | 1.2.7 (2013-12-21) | |
261 | ------------------ | |
262 | ||
263 | - Use pep8 name for methods but keep an alias for camel case method. | |
264 | Eg: remove_attr and removeAttr works | |
265 | Fix #57 | |
266 | ||
267 | - .text() now return an empty string instead of None if there is no text node. | |
268 | Fix #45 | |
269 | ||
270 | - Fixed #23: removeClass adds class attribute to elements which previously | |
271 | lacked one | |
272 | ||
273 | ||
274 | 1.2.6 (2013-10-11) | |
275 | ------------------ | |
276 | ||
277 | - README_fixt.py was not include in the release. Fix #54. | |
278 | ||
279 | ||
280 | 1.2.5 (2013-10-10) | |
281 | ------------------ | |
282 | ||
283 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
284 | ||
285 | - tests improvments. no longer require a eth connection. | |
286 | ||
287 | - fix #55 | |
288 | ||
289 | 1.2.4 | |
290 | ----- | |
291 | ||
292 | - Moved to github. So a few files are renamed from .txt to .rst | |
293 | ||
294 | - Added .xhtml_to_html() and .remove_namespaces() | |
295 | ||
296 | - Use requests to fetch urls (if available) | |
297 | ||
298 | - Use restkit's proxy instead of Paste (which will die with py3) | |
299 | ||
300 | - Allow to open https urls | |
301 | ||
302 | - python2.5 is no longer supported (may work, but tests are broken) | |
303 | ||
304 | 1.2.3 | |
305 | ----- | |
306 | ||
307 | - Allow to pass this in .filter() callback | |
308 | ||
309 | - Add .contents() .items() | |
310 | ||
311 | - Add tox.ini | |
312 | ||
313 | - Bug fixes: fix #35 #55 #64 #66 | |
314 | ||
315 | 1.2.2 | |
316 | ----- | |
317 | ||
318 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
319 | ||
320 | - Fix issue #37 (Caleb Burns) | |
321 | ||
322 | 1.2.1 | |
323 | ----- | |
324 | ||
325 | - Allow to use a custom css translator. | |
326 | ||
327 | - Fix issue 44: case problem with xml documents | |
328 | ||
329 | 1.2 | |
330 | --- | |
331 | ||
332 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
333 | ||
334 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
335 | ||
336 | 1.1.1 | |
337 | ----- | |
338 | ||
339 | - Minor release. Include test file so you can run tests from the tarball. | |
340 | ||
341 | ||
342 | 1.1 | |
343 | --- | |
344 | ||
345 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
346 | ||
347 | ||
348 | 1.0 | |
349 | --- | |
350 | ||
351 | - fix issues 24 | |
352 | ||
353 | 0.7 | |
354 | --- | |
355 | ||
356 | - Python 3 compatible | |
357 | ||
358 | - Add __unicode__ method | |
359 | ||
360 | - Add root and encoding attribute | |
361 | ||
362 | - fix issues 19, 20, 22, 23 | |
363 | ||
364 | 0.6.1 | |
365 | ------ | |
366 | ||
367 | - Move README.txt at package root | |
368 | ||
369 | - Add CHANGES.txt and add it to long_description | |
370 | ||
371 | 0.6 | |
372 | ---- | |
373 | ||
374 | - Added PyQuery.outerHtml | |
375 | ||
376 | - Added PyQuery.fn | |
377 | ||
378 | - Added PyQuery.map | |
379 | ||
380 | - Change PyQuery.each behavior to reflect jQuery api | |
381 | ||
382 | ||
383 | ||
384 | ||
385 | ||
386 |
0 | 0 | pyquery: a jquery-like library for python |
1 | 1 | ========================================= |
2 | 2 | |
3 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
4 | :alt: Build Status | |
5 | :target: https://travis-ci.org/gawel/pyquery | |
6 | ||
3 | 7 | pyquery allows you to make jquery queries on xml documents. |
4 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast | |
8 | The API is as much as possible similar to jquery. pyquery uses lxml for fast | |
5 | 9 | xml and html manipulation. |
6 | 10 | |
7 | 11 | This is not (or at least not yet) a library to produce or interact with |
8 | 12 | javascript code. I just liked the jquery API and I missed it in python so I |
9 | 13 | told myself "Hey let's make jquery in python". This is the result. |
10 | 14 | |
11 | The `project`_ is being actively developped on a git repository on Github. I | |
12 | have the policy of giving push access to anyone who wants it and then to review | |
13 | what he does. So if you want to contribute just email me. | |
15 | The `project`_ is being actively developed on a git repository on Github. I | |
16 | have the policy of giving push access to anyone who wants it and then reviewing | |
17 | what they do. So if you want to contribute just email me. | |
14 | 18 | |
15 | 19 | Please report bugs on the `github |
16 | 20 | <https://github.com/gawel/pyquery/issues>`_ issue |
18 | 22 | |
19 | 23 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance |
20 | 24 | .. _project: https://github.com/gawel/pyquery/ |
25 | ||
26 | I've spent hours maintaining this software, with love. | |
27 | Please consider tipping if you like it: | |
28 | ||
29 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
30 | ||
31 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
32 | ||
33 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
34 | ||
35 | .. | |
36 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
21 | 37 | |
22 | 38 | Quickstart |
23 | 39 | ========== |
0 | [buildout] | |
1 | newest = false | |
2 | parts = py2 docs | |
3 | develop = . | |
4 | ||
5 | [py3] | |
6 | recipe = zc.recipe.egg | |
7 | eggs = | |
8 | cssselect>0.7.9 | |
9 | WebOb>1.1.9 | |
10 | WebTest | |
11 | pyquery | |
12 | nose | |
13 | coverage | |
14 | ||
15 | [py2] | |
16 | recipe = zc.recipe.egg | |
17 | eggs = | |
18 | ${py3:eggs} | |
19 | unittest2 | |
20 | BeautifulSoup | |
21 | restkit | |
22 | ||
23 | ||
24 | [docs] | |
25 | recipe = zc.recipe.egg | |
26 | eggs = | |
27 | ${py2:eggs} | |
28 | Pygments | |
29 | Sphinx | |
30 | sphinx-pypi-upload | |
31 | interpreter = py | |
32 | scripts = | |
33 | sphinx-build | |
34 | ||
35 | [tox] | |
36 | recipe = gp.recipe.tox |
0 | import os | |
1 | import pytest | |
2 | from webtest import http | |
3 | from webtest.debugapp import debug_app | |
4 | from urllib.request import urlopen | |
5 | ||
6 | ||
7 | @pytest.fixture | |
8 | def readme_fixt(): | |
9 | server = http.StopableWSGIServer.create(debug_app) | |
10 | server.wait() | |
11 | path_to_html_file = os.path.join('tests', 'test.html') | |
12 | yield ( | |
13 | urlopen, | |
14 | server.application_url, | |
15 | path_to_html_file, | |
16 | ) | |
17 | server.shutdown() |
0 | ============================================= | |
1 | :mod:`pyquery.ajax` -- PyQuery AJAX extension | |
2 | ============================================= | |
3 | ||
4 | .. automodule:: pyquery.ajax | |
5 | ||
6 | ||
7 | .. fake imports | |
8 | ||
9 | >>> from pyquery.ajax import PyQuery as pq | |
10 | ||
11 | You can query some wsgi app if `WebOb`_ is installed (it's not a pyquery | |
12 | dependencie). IN this example the test app returns a simple input at `/` and a | |
13 | submit button at `/submit`:: | |
14 | ||
15 | >>> d = pq('<form></form>', app=input_app) | |
16 | >>> d.append(d.get('/')) | |
17 | [<form>] | |
18 | >>> print(d) | |
19 | <form><input name="youyou" type="text" value=""/></form> | |
20 | ||
21 | The app is also available in new nodes:: | |
22 | ||
23 | >>> d.get('/').app is d.app is d('form').app | |
24 | True | |
25 | ||
26 | You can also request another path:: | |
27 | ||
28 | >>> d.append(d.get('/submit')) | |
29 | [<form>] | |
30 | >>> print(d) | |
31 | <form><input name="youyou" type="text" value=""/><input type="submit" value="OK"/></form> | |
32 | ||
33 | If `restkit`_ is installed, you are able to get url directly with a `HostProxy`_ app:: | |
34 | ||
35 | >>> a = d.get(your_url) | |
36 | >>> a | |
37 | [<html>] | |
38 | ||
39 | You can retrieve the app response:: | |
40 | ||
41 | >>> print(a.response.status) | |
42 | 200 OK | |
43 | ||
44 | The response attribute is a `WebOb`_ `Response`_ | |
45 | ||
46 | .. _webob: http://pythonpaste.org/webob/ | |
47 | .. _response: http://pythonpaste.org/webob/#response | |
48 | .. _restkit: http://benoitc.github.com/restkit/ | |
49 | .. _hostproxy: http://benoitc.github.com/restkit/wsgi_proxy.html | |
50 | ||
51 | Api | |
52 | --- | |
53 | ||
54 | .. autoclass:: PyQuery | |
55 | :members: | |
56 |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | import sys | |
3 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
4 | from webtest import http | |
5 | from doctest import SKIP | |
6 | from tests.apps import input_app | |
7 | ||
8 | PY3 = sys.version_info >= (3,) | |
9 | ||
10 | ||
11 | def setup_test(test): | |
12 | for example in test.examples: | |
13 | # urlopen as moved in py3 | |
14 | if PY3: | |
15 | example.options.setdefault(SKIP, 1) | |
16 | if not PY3: | |
17 | server = http.StopableWSGIServer.create(input_app) | |
18 | server.wait() | |
19 | path_to_html_file = os.path.join('tests', 'test.html') | |
20 | test.globs.update( | |
21 | input_app=input_app, | |
22 | server=server, | |
23 | your_url=server.application_url.rstrip('/') + '/html', | |
24 | path_to_html_file=path_to_html_file, | |
25 | ) | |
26 | setup_test.__test__ = False | |
27 | ||
28 | ||
29 | def teardown_test(test): | |
30 | if 'server' in test.globs: | |
31 | test.globs['server'].shutdown() | |
32 | teardown_test.__test__ = False |
2 | 2 | |
3 | 3 | .. |
4 | 4 | >>> from pyquery import PyQuery as pq |
5 | ||
6 | Using attribute to select specific tag | |
7 | In attribute selectors, the value should be a valid CSS identifier or quoted as string:: | |
8 | ||
9 | >>> d = pq("<option value='1'><option value='2'>") | |
10 | >>> d('option[value="1"]') | |
11 | [<option>] | |
12 | ||
5 | 13 | |
6 | 14 | You can play with the attributes with the jquery API:: |
7 | 15 |
40 | 40 | |
41 | 41 | # General information about the project. |
42 | 42 | project = u'pyquery' |
43 | copyright = u'2012, Olivier Lauzanne' | |
43 | copyright = u'2012-2017, Olivier Lauzanne' | |
44 | 44 | |
45 | 45 | # The version info for the project you're documenting, acts as replacement for |
46 | 46 | # |version| and |release|, also used in various other places throughout the |
47 | 47 | # built documents. |
48 | 48 | # |
49 | 49 | # The short X.Y version. |
50 | version = '0.3' | |
50 | version = '1.3.x' | |
51 | 51 | # The full version, including alpha/beta/rc tags. |
52 | release = '0.3' | |
52 | release = '1.3.x' | |
53 | 53 | |
54 | 54 | # The language for content autogenerated by Sphinx. Refer to documentation |
55 | 55 | # for a list of supported languages. |
253 | 253 | break |
254 | 254 | del pkg_dir, setup, path |
255 | 255 | |
256 | from pyquery.cssselectpatch import JQueryTranslator | |
257 | ||
258 | with open('pseudo_classes.rst', 'w') as fd: | |
259 | fd.write('=========================\n') | |
260 | fd.write('Using pseudo classes\n') | |
261 | fd.write('=========================\n') | |
262 | for k in sorted(dir(JQueryTranslator)): | |
263 | if k.startswith('xpath_'): | |
264 | attr = getattr(JQueryTranslator, k) | |
265 | doc = getattr(attr, '__doc__', '') or '' | |
266 | doc = doc.strip() | |
267 | if doc.startswith('Common implementation'): | |
268 | continue | |
269 | k = k[6:] | |
270 | if '_' not in k or not doc: | |
271 | continue | |
272 | k, t = k.split('_', 1) | |
273 | if '_' in t: | |
274 | continue | |
275 | if t == 'function': | |
276 | k += '()' | |
277 | fd.write('\n\n:%s\n' % k) | |
278 | fd.write('==================\n\n') | |
279 | fd.write(doc.strip('..').replace(' ', ' ')) | |
256 | try: | |
257 | from pyquery.cssselectpatch import JQueryTranslator | |
258 | except ImportError: | |
259 | pass | |
260 | else: | |
261 | with open('pseudo_classes.rst', 'w') as fd: | |
262 | fd.write('=========================\n') | |
263 | fd.write('Using pseudo classes\n') | |
264 | fd.write('=========================\n') | |
265 | for k in sorted(dir(JQueryTranslator)): | |
266 | if k.startswith('xpath_'): | |
267 | attr = getattr(JQueryTranslator, k) | |
268 | doc = getattr(attr, '__doc__', '') or '' | |
269 | doc = doc.strip() | |
270 | if doc.startswith('Common implementation'): | |
271 | continue | |
272 | k = k[6:] | |
273 | if '_' not in k or not doc: | |
274 | continue | |
275 | k, t = k.split('_', 1) | |
276 | if '_' in t: | |
277 | continue | |
278 | if t == 'function': | |
279 | k += '()' | |
280 | fd.write('\n\n:%s\n' % k) | |
281 | fd.write('==================\n\n') | |
282 | fd.write(doc.strip('..').replace(' ', ' ')) |
0 | import os | |
1 | import sys | |
2 | import pytest | |
3 | from webtest import http | |
4 | from webtest.debugapp import debug_app | |
5 | ||
6 | ||
7 | @pytest.fixture | |
8 | def scrap_url(): | |
9 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
10 | from tests.apps import input_app | |
11 | server = http.StopableWSGIServer.create(input_app) | |
12 | server.wait() | |
13 | yield server.application_url.rstrip('/') + '/html' | |
14 | server.shutdown() | |
15 | ||
16 | ||
17 | @pytest.fixture | |
18 | def tips_url(): | |
19 | server = http.StopableWSGIServer.create(debug_app) | |
20 | server.wait() | |
21 | yield server.application_url.rstrip('/') + '/form.html' | |
22 | server.shutdown() |
70 | 70 | >>> print(pq('<div>Yeah !</div>').addClass('myclass') + pq('<b>cool</b>')) |
71 | 71 | <div class="myclass">Yeah !</div><b>cool</b> |
72 | 72 | |
73 | Remove all namespaces:: | |
73 | 74 | |
75 | >>> d = pq('<foo xmlns="http://example.com/foo"></foo>') | |
76 | >>> d | |
77 | [<{http://example.com/foo}foo>] | |
78 | >>> d.remove_namespaces() | |
79 | [<foo>] | |
80 |
51 | 51 | |
52 | 52 | >>> from pyquery import PyQuery |
53 | 53 | >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>') |
54 | >>> d(':contains("title")') | |
54 | >>> d('h1:contains("title")') | |
55 | 55 | [<h1.title>] |
56 | 56 | |
57 | 57 | |
81 | 81 | >>> from pyquery import PyQuery |
82 | 82 | >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>') |
83 | 83 | >>> d(':empty') |
84 | [<span>, <h2>] | |
84 | [<h2>] | |
85 | 85 | |
86 | 86 | |
87 | 87 | |
159 | 159 | |
160 | 160 | |
161 | 161 | |
162 | :has() | |
163 | ================== | |
164 | ||
165 | Matches elements which contain at least one element that matches | |
166 | the specified selector. https://api.jquery.com/has-selector/ | |
167 | ||
168 | >>> from pyquery import PyQuery | |
169 | >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>') | |
170 | >>> d('.foo:has(".baz")') | |
171 | [] | |
172 | >>> d('.foo:has(".foo")') | |
173 | [] | |
174 | >>> d('.foo:has(".bar")') | |
175 | [<div.foo>] | |
176 | >>> d('.foo:has(div)') | |
177 | [<div.foo>] | |
178 | ||
179 | ||
180 | ||
162 | 181 | :header |
163 | 182 | ================== |
164 | 183 | |
165 | Matches all header elelements (h1, ..., h6):: | |
184 | Matches all header elements (h1, ..., h6):: | |
166 | 185 | |
167 | 186 | >>> from pyquery import PyQuery |
168 | 187 | >>> d = PyQuery('<div><h1>title</h1></div>') |
268 | 287 | |
269 | 288 | |
270 | 289 | |
290 | :pseudo | |
291 | ================== | |
292 | ||
293 | Translate a pseudo-element. | |
294 | ||
295 | Defaults to not supporting pseudo-elements at all, | |
296 | but can be overridden by sub-classes | |
297 | ||
271 | 298 | :radio |
272 | 299 | ================== |
273 | 300 |
1 | 1 | ========= |
2 | 2 | |
3 | 3 | .. |
4 | >>> from pyquery.ajax import PyQuery as pq | |
4 | >>> from pyquery import PyQuery as pq | |
5 | >>> your_url = getfixture('scrap_url') | |
5 | 6 | |
6 | 7 | PyQuery is able to load an html document from a url:: |
7 | 8 | |
8 | >>> pq(your_url) | |
9 | >>> pq(url=your_url) | |
9 | 10 | [<html>] |
10 | 11 | |
11 | 12 | By default it uses python's urllib. |
12 | 13 | |
13 | 14 | If `requests`_ is installed then it will use it. This allow you to use most of `requests`_ parameters:: |
14 | 15 | |
15 | >>> pq(your_url, headers={'user-agent': 'pyquery'}) | |
16 | >>> pq(url=your_url, headers={'user-agent': 'pyquery'}) | |
16 | 17 | [<html>] |
17 | 18 | |
18 | >>> pq(your_url, {'q': 'foo'}, method='post', verify=True) | |
19 | >>> pq(url=your_url, data={'q': 'foo'}, method='post', verify=True) | |
19 | 20 | [<html>] |
20 | 21 | |
22 | ||
23 | Timeout | |
24 | ------- | |
25 | ||
26 | The default timeout is 60 seconds, you can change it by setting the timeout parameter which is forwarded to the underlying urllib or requests library. | |
27 | ||
28 | Session | |
29 | ------- | |
30 | ||
31 | When using the requests library you can instantiate a Session object which keeps state between http calls (for example - to keep cookies). You can set the session parameter to use this session object. | |
32 | ||
21 | 33 | .. _requests: http://docs.python-requests.org/en/latest/ |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | import sys | |
3 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
4 | from webtest import http | |
5 | from tests.apps import input_app | |
6 | ||
7 | ||
8 | def setup_test(test): | |
9 | server = http.StopableWSGIServer.create(input_app) | |
10 | server.wait() | |
11 | test.globs.update( | |
12 | server=server, | |
13 | your_url=server.application_url.rstrip('/') + '/html', | |
14 | ) | |
15 | setup_test.__test__ = False | |
16 | ||
17 | ||
18 | def teardown_test(test): | |
19 | test.globs['server'].shutdown() | |
20 | teardown_test.__test__ = False |
2 | 2 | |
3 | 3 | .. |
4 | 4 | >>> from pyquery import PyQuery as pq |
5 | >>> your_url = getfixture('tips_url') | |
5 | 6 | |
6 | 7 | Making links absolute |
7 | 8 | --------------------- |
8 | 9 | |
9 | You can make links absolute which can be usefull for screen scrapping:: | |
10 | You can make links absolute which can be useful for screen scrapping:: | |
10 | 11 | |
11 | 12 | >>> d = pq(url=your_url, parser='html') |
12 | 13 | >>> d('form').attr('action') |
0 | # -*- coding: utf-8 -*- | |
1 | import os | |
2 | from webtest import http | |
3 | from webtest.debugapp import debug_app | |
4 | ||
5 | ||
6 | def setup_test(test): | |
7 | server = http.StopableWSGIServer.create(debug_app) | |
8 | server.wait() | |
9 | path_to_html_file = os.path.join('tests', 'test.html') | |
10 | test.globs.update( | |
11 | server=server, | |
12 | your_url=server.application_url.rstrip('/') + '/form.html', | |
13 | path_to_html_file=path_to_html_file, | |
14 | ) | |
15 | setup_test.__test__ = False | |
16 | ||
17 | ||
18 | def teardown_test(test): | |
19 | test.globs['server'].shutdown() | |
20 | teardown_test.__test__ = False |
33 | 33 | [<p#hello.hello>, <p#test>] |
34 | 34 | |
35 | 35 | |
36 | If you want to select a dotted id you need to escape the dot:: | |
37 | ||
38 | >>> d = pq('<p id="hello.you"><a/></p><p id="test"><a/></p>') | |
39 | >>> d(r'#hello\.you') | |
40 | [<p#hello.you>] | |
41 |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | |
6 | try: | |
7 | import webob | |
8 | import restkit | |
9 | except ImportError: | |
10 | from .pyquery import PyQuery | |
11 | else: | |
12 | from .ajax import PyQuery | |
13 | ||
4 | from .pyquery import PyQuery # NOQA |
0 | # -*- coding: utf-8 -*- | |
1 | from .pyquery import PyQuery as Base | |
2 | from .pyquery import no_default | |
3 | ||
4 | from webob import Request | |
5 | from webob import Response | |
6 | ||
7 | try: | |
8 | from restkit.contrib.wsgi_proxy import HostProxy | |
9 | except ImportError: | |
10 | HostProxy = no_default # NOQA | |
11 | ||
12 | ||
13 | class PyQuery(Base): | |
14 | ||
15 | def __init__(self, *args, **kwargs): | |
16 | if 'response' in kwargs: | |
17 | self.response = kwargs.pop('response') | |
18 | else: | |
19 | self.response = Response() | |
20 | if 'app' in kwargs: | |
21 | self.app = kwargs.pop('app') | |
22 | if len(args) == 0: | |
23 | args = [[]] | |
24 | else: | |
25 | self.app = no_default | |
26 | Base.__init__(self, *args, **kwargs) | |
27 | if self._parent is not no_default: | |
28 | self.app = self._parent.app | |
29 | ||
30 | def _wsgi_get(self, path_info, **kwargs): | |
31 | if path_info.startswith('/'): | |
32 | if 'app' in kwargs: | |
33 | app = kwargs.pop('app') | |
34 | elif self.app is not no_default: | |
35 | app = self.app | |
36 | else: | |
37 | raise ValueError('There is no app available') | |
38 | else: | |
39 | if HostProxy is not no_default: | |
40 | app = HostProxy(path_info) | |
41 | path_info = '/' | |
42 | else: | |
43 | raise ImportError('restkit is not installed') | |
44 | ||
45 | environ = kwargs.pop('environ').copy() | |
46 | environ.update(kwargs) | |
47 | ||
48 | # unsuported (came from Deliverance) | |
49 | for key in ['HTTP_ACCEPT_ENCODING', 'HTTP_IF_MATCH', | |
50 | 'HTTP_IF_UNMODIFIED_SINCE', 'HTTP_RANGE', 'HTTP_IF_RANGE']: | |
51 | if key in environ: | |
52 | del environ[key] | |
53 | ||
54 | req = Request.blank(path_info) | |
55 | req.environ.update(environ) | |
56 | resp = req.get_response(app) | |
57 | status = resp.status.split() | |
58 | ctype = resp.content_type.split(';')[0] | |
59 | if status[0] not in '45' and ctype == 'text/html': | |
60 | body = resp.body | |
61 | else: | |
62 | body = [] | |
63 | result = self.__class__(body, | |
64 | parent=self._parent, | |
65 | app=self.app, # always return self.app | |
66 | response=resp) | |
67 | return result | |
68 | ||
69 | def get(self, path_info, **kwargs): | |
70 | """GET a path from wsgi app or url | |
71 | """ | |
72 | environ = kwargs.setdefault('environ', {}) | |
73 | environ['REQUEST_METHOD'] = 'GET' | |
74 | environ['CONTENT_LENGTH'] = '0' | |
75 | return self._wsgi_get(path_info, **kwargs) | |
76 | ||
77 | def post(self, path_info, **kwargs): | |
78 | """POST a path from wsgi app or url | |
79 | """ | |
80 | environ = kwargs.setdefault('environ', {}) | |
81 | environ['REQUEST_METHOD'] = 'POST' | |
82 | return self._wsgi_get(path_info, **kwargs) |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
126 | 124 | xpath.add_condition("@selected and name(.) = 'option'") |
127 | 125 | return xpath |
128 | 126 | |
127 | def _format_disabled_xpath(self, disabled=True): | |
128 | """Format XPath condition for :disabled or :enabled pseudo-classes | |
129 | according to the WHATWG spec. See: https://html.spec.whatwg.org | |
130 | /multipage/semantics-other.html#concept-element-disabled | |
131 | """ | |
132 | bool_op = '' if disabled else 'not' | |
133 | return '''( | |
134 | ((name(.) = 'button' or name(.) = 'input' or name(.) = 'select' | |
135 | or name(.) = 'textarea' or name(.) = 'fieldset') | |
136 | and %s(@disabled or (ancestor::fieldset[@disabled] | |
137 | and not(ancestor::legend[not(preceding-sibling::legend)]))) | |
138 | ) | |
139 | or | |
140 | ((name(.) = 'option' | |
141 | and %s(@disabled or ancestor::optgroup[@disabled])) | |
142 | ) | |
143 | or | |
144 | ((name(.) = 'optgroup' and %s(@disabled))) | |
145 | )''' % (bool_op, bool_op, bool_op) | |
146 | ||
129 | 147 | def xpath_disabled_pseudo(self, xpath): |
130 | 148 | """Matches all elements that are disabled:: |
131 | 149 | |
136 | 154 | |
137 | 155 | .. |
138 | 156 | """ |
139 | xpath.add_condition("@disabled") | |
157 | xpath.add_condition(self._format_disabled_xpath()) | |
140 | 158 | return xpath |
141 | 159 | |
142 | 160 | def xpath_enabled_pseudo(self, xpath): |
149 | 167 | |
150 | 168 | .. |
151 | 169 | """ |
152 | xpath.add_condition("not(@disabled) and name(.) = 'input'") | |
170 | xpath.add_condition(self._format_disabled_xpath(disabled=False)) | |
153 | 171 | return xpath |
154 | 172 | |
155 | 173 | def xpath_file_pseudo(self, xpath): |
302 | 320 | return xpath |
303 | 321 | |
304 | 322 | def xpath_header_pseudo(self, xpath): |
305 | """Matches all header elelements (h1, ..., h6):: | |
323 | """Matches all header elements (h1, ..., h6):: | |
306 | 324 | |
307 | 325 | >>> from pyquery import PyQuery |
308 | 326 | >>> d = PyQuery('<div><h1>title</h1></div>') |
336 | 354 | >>> from pyquery import PyQuery |
337 | 355 | >>> d = PyQuery('<div><h1><span>title</span></h1><h2/></div>') |
338 | 356 | >>> d(':empty') |
339 | [<span>, <h2>] | |
340 | ||
341 | .. | |
342 | """ | |
343 | xpath.add_condition("count(child::*) = 0") | |
357 | [<h2>] | |
358 | ||
359 | .. | |
360 | """ | |
361 | xpath.add_condition("not(node())") | |
344 | 362 | return xpath |
345 | 363 | |
346 | 364 | def xpath_eq_function(self, xpath, function): |
405 | 423 | |
406 | 424 | >>> from pyquery import PyQuery |
407 | 425 | >>> d = PyQuery('<div><h1/><h1 class="title">title</h1></div>') |
408 | >>> d(':contains("title")') | |
426 | >>> d('h1:contains("title")') | |
409 | 427 | [<h1.title>] |
410 | 428 | |
411 | 429 | .. |
412 | 430 | """ |
413 | if function.argument_types() != ['STRING']: | |
431 | if function.argument_types() not in (['STRING'], ['IDENT']): | |
414 | 432 | raise ExpressionError( |
415 | "Expected a single string for :contains(), got %r" % ( | |
433 | "Expected a single string or ident for :contains(), got %r" % ( | |
416 | 434 | function.arguments,)) |
417 | 435 | |
418 | 436 | value = self.xpath_literal(function.arguments[0].value) |
419 | xpath.add_post_condition("contains(text(), %s)" % value) | |
420 | return xpath | |
437 | xpath.add_post_condition('contains(., %s)' % value) | |
438 | return xpath | |
439 | ||
440 | def xpath_has_function(self, xpath, function): | |
441 | """Matches elements which contain at least one element that matches | |
442 | the specified selector. https://api.jquery.com/has-selector/ | |
443 | ||
444 | >>> from pyquery import PyQuery | |
445 | >>> d = PyQuery('<div class="foo"><div class="bar"></div></div>') | |
446 | >>> d('.foo:has(".baz")') | |
447 | [] | |
448 | >>> d('.foo:has(".foo")') | |
449 | [] | |
450 | >>> d('.foo:has(".bar")') | |
451 | [<div.foo>] | |
452 | >>> d('.foo:has(div)') | |
453 | [<div.foo>] | |
454 | ||
455 | .. | |
456 | """ | |
457 | if function.argument_types() not in (['STRING'], ['IDENT']): | |
458 | raise ExpressionError( | |
459 | "Expected a single string or ident for :has(), got %r" % ( | |
460 | function.arguments,)) | |
461 | value = self.css_to_xpath( | |
462 | function.arguments[0].value, prefix='descendant::', | |
463 | ) | |
464 | xpath.add_post_condition(value) | |
465 | return xpath |
0 | 0 | # -*- coding: utf-8 -*- |
1 | import sys | |
2 | ||
3 | PY3k = sys.version_info >= (3,) | |
4 | ||
5 | if PY3k: | |
6 | from urllib.request import urlopen | |
7 | from urllib.parse import urlencode | |
8 | basestring = (str, bytes) | |
9 | else: | |
10 | from urllib2 import urlopen # NOQA | |
11 | from urllib import urlencode # NOQA | |
1 | from urllib.request import urlopen | |
2 | from urllib.parse import urlencode | |
3 | from urllib.error import HTTPError | |
12 | 4 | |
13 | 5 | try: |
14 | 6 | import requests |
16 | 8 | except ImportError: |
17 | 9 | HAS_REQUEST = False |
18 | 10 | |
11 | DEFAULT_TIMEOUT = 60 | |
12 | ||
13 | basestring = (str, bytes) | |
19 | 14 | |
20 | 15 | allowed_args = ( |
21 | 'auth', 'data', 'headers', 'verify', 'cert', 'config', 'hooks', 'proxies') | |
16 | 'auth', 'data', 'headers', 'verify', | |
17 | 'cert', 'config', 'hooks', 'proxies', 'cookies' | |
18 | ) | |
22 | 19 | |
23 | 20 | |
24 | 21 | def _query(url, method, kwargs): |
37 | 34 | url += data |
38 | 35 | data = None |
39 | 36 | |
40 | if data and PY3k: | |
37 | if data: | |
41 | 38 | data = data.encode('utf-8') |
42 | 39 | return url, data |
43 | 40 | |
44 | 41 | |
45 | 42 | def _requests(url, kwargs): |
43 | ||
46 | 44 | encoding = kwargs.get('encoding') |
47 | 45 | method = kwargs.get('method', 'get').lower() |
48 | meth = getattr(requests, str(method)) | |
46 | session = kwargs.get('session') | |
47 | if session: | |
48 | meth = getattr(session, str(method)) | |
49 | else: | |
50 | meth = getattr(requests, str(method)) | |
49 | 51 | if method == 'get': |
50 | 52 | url, data = _query(url, method, kwargs) |
51 | 53 | kw = {} |
52 | 54 | for k in allowed_args: |
53 | 55 | if k in kwargs: |
54 | 56 | kw[k] = kwargs[k] |
55 | resp = meth(url=url, **kw) | |
57 | resp = meth(url=url, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT), **kw) | |
58 | if not (200 <= resp.status_code < 300): | |
59 | raise HTTPError(resp.url, resp.status_code, | |
60 | resp.reason, resp.headers, None) | |
56 | 61 | if encoding: |
57 | 62 | resp.encoding = encoding |
58 | 63 | html = resp.text |
62 | 67 | def _urllib(url, kwargs): |
63 | 68 | method = kwargs.get('method') |
64 | 69 | url, data = _query(url, method, kwargs) |
65 | return urlopen(url, data) | |
70 | return urlopen(url, data, timeout=kwargs.get('timeout', DEFAULT_TIMEOUT)) | |
66 | 71 | |
67 | 72 | |
68 | 73 | def url_opener(url, kwargs): |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | from .cssselectpatch import JQueryTranslator |
4 | from collections import OrderedDict | |
5 | from urllib.parse import urlencode | |
6 | from urllib.parse import urljoin | |
6 | 7 | from .openers import url_opener |
8 | from .text import extract_text | |
7 | 9 | from copy import deepcopy |
10 | from html import escape | |
8 | 11 | from lxml import etree |
9 | 12 | import lxml.html |
10 | 13 | import inspect |
14 | import itertools | |
11 | 15 | import types |
12 | import sys | |
13 | ||
14 | ||
15 | PY3k = sys.version_info >= (3,) | |
16 | ||
17 | if PY3k: | |
18 | from urllib.parse import urlencode | |
19 | from urllib.parse import urljoin | |
20 | basestring = (str, bytes) | |
21 | unicode = str | |
22 | else: | |
23 | from urllib import urlencode # NOQA | |
24 | from urlparse import urljoin # NOQA | |
25 | ||
26 | ||
27 | def func_globals(f): | |
28 | return f.__globals__ if PY3k else f.func_globals | |
29 | ||
30 | ||
31 | def func_code(f): | |
32 | return f.__code__ if PY3k else f.func_code | |
16 | ||
17 | basestring = (str, bytes) | |
18 | ||
19 | ||
20 | def getargspec(func): | |
21 | args = inspect.signature(func).parameters.values() | |
22 | return [p.name for p in args | |
23 | if p.kind == p.POSITIONAL_OR_KEYWORD] | |
33 | 24 | |
34 | 25 | |
35 | 26 | def with_camel_case_alias(func): |
36 | 27 | """decorator for methods who required a camelcase alias""" |
37 | 28 | _camel_case_aliases.add(func.__name__) |
38 | 29 | return func |
30 | ||
31 | ||
39 | 32 | _camel_case_aliases = set() |
40 | 33 | |
41 | 34 | |
45 | 38 | parts = list(alias.split('_')) |
46 | 39 | name = parts[0] + ''.join([p.title() for p in parts[1:]]) |
47 | 40 | func = getattr(PyQuery, alias) |
48 | f = types.FunctionType(func_code(func), func_globals(func), | |
49 | name, inspect.getargspec(func).defaults) | |
41 | f = types.FunctionType(func.__code__, func.__globals__, | |
42 | name, func.__defaults__) | |
50 | 43 | f.__doc__ = ( |
51 | 44 | 'Alias for :func:`~pyquery.pyquery.PyQuery.%s`') % func.__name__ |
52 | 45 | setattr(PyQuery, name, f.__get__(None, PyQuery)) |
98 | 91 | |
99 | 92 | |
100 | 93 | def callback(func, *args): |
101 | return func(*args[:func_code(func).co_argcount]) | |
94 | return func(*args[:func.__code__.co_argcount]) | |
102 | 95 | |
103 | 96 | |
104 | 97 | class NoDefault(object): |
105 | 98 | def __repr__(self): |
106 | 99 | """clean representation in Sphinx""" |
107 | 100 | return '<NoDefault>' |
101 | ||
108 | 102 | |
109 | 103 | no_default = NoDefault() |
110 | 104 | del NoDefault |
155 | 149 | self._base_url = None |
156 | 150 | self.parser = kwargs.pop('parser', None) |
157 | 151 | |
158 | if (len(args) >= 1 and | |
159 | (not PY3k and isinstance(args[0], basestring) or | |
160 | (PY3k and isinstance(args[0], str))) and | |
161 | args[0].split('://', 1)[0] in ('http', 'https')): | |
162 | kwargs['url'] = args[0] | |
163 | if len(args) >= 2: | |
164 | kwargs['data'] = args[1] | |
165 | args = [] | |
166 | ||
167 | 152 | if 'parent' in kwargs: |
168 | 153 | self._parent = kwargs.pop('parent') |
169 | 154 | else: |
178 | 163 | else: |
179 | 164 | self._translator = self._translator_class(xhtml=False) |
180 | 165 | |
181 | namespaces = kwargs.pop('namespaces', {}) | |
166 | self.namespaces = kwargs.pop('namespaces', None) | |
182 | 167 | |
183 | 168 | if kwargs: |
184 | 169 | # specific case to get the dom |
185 | 170 | if 'filename' in kwargs: |
186 | html = open(kwargs['filename']) | |
171 | html = open(kwargs['filename'], | |
172 | encoding=kwargs.get('encoding')) | |
187 | 173 | elif 'url' in kwargs: |
188 | 174 | url = kwargs.pop('url') |
189 | 175 | if 'opener' in kwargs: |
202 | 188 | if hasattr(html, 'close'): |
203 | 189 | try: |
204 | 190 | html.close() |
205 | except: | |
191 | except Exception: | |
206 | 192 | pass |
207 | 193 | |
208 | 194 | else: |
232 | 218 | elements = context |
233 | 219 | elif isinstance(context, etree._Element): |
234 | 220 | elements = [context] |
221 | else: | |
222 | raise TypeError(context) | |
235 | 223 | |
236 | 224 | # select nodes |
237 | 225 | if elements and selector is not no_default: |
238 | 226 | xpath = self._css_to_xpath(selector) |
239 | 227 | results = [] |
240 | 228 | for tag in elements: |
241 | results.extend(tag.xpath(xpath, namespaces=namespaces)) | |
229 | results.extend( | |
230 | tag.xpath(xpath, namespaces=self.namespaces)) | |
242 | 231 | elements = results |
243 | 232 | |
244 | 233 | list.__init__(self, elements) |
246 | 235 | def _css_to_xpath(self, selector, prefix='descendant-or-self::'): |
247 | 236 | selector = selector.replace('[@', '[') |
248 | 237 | return self._translator.css_to_xpath(selector, prefix) |
238 | ||
239 | def _copy(self, *args, **kwargs): | |
240 | kwargs.setdefault('namespaces', self.namespaces) | |
241 | return self.__class__(*args, **kwargs) | |
249 | 242 | |
250 | 243 | def __call__(self, *args, **kwargs): |
251 | 244 | """return a new PyQuery instance |
254 | 247 | if length == 0: |
255 | 248 | raise ValueError('You must provide at least a selector') |
256 | 249 | if args[0] == '': |
257 | return self.__class__([]) | |
250 | return self._copy([]) | |
258 | 251 | if (len(args) == 1 and |
259 | (not PY3k and isinstance(args[0], basestring) or | |
260 | (PY3k and isinstance(args[0], str))) and | |
252 | isinstance(args[0], str) and | |
261 | 253 | not args[0].startswith('<')): |
262 | 254 | args += (self,) |
263 | result = self.__class__(*args, parent=self, **kwargs) | |
255 | result = self._copy(*args, parent=self, **kwargs) | |
264 | 256 | return result |
265 | 257 | |
266 | 258 | # keep original list api prefixed with _ |
270 | 262 | # improve pythonic api |
271 | 263 | def __add__(self, other): |
272 | 264 | assert isinstance(other, self.__class__) |
273 | return self.__class__(self[:] + other[:]) | |
265 | return self._copy(self[:] + other[:]) | |
274 | 266 | |
275 | 267 | def extend(self, other): |
276 | """Extend with anoter PyQuery object""" | |
268 | """Extend with another PyQuery object""" | |
277 | 269 | assert isinstance(other, self.__class__) |
278 | 270 | self._extend(other[:]) |
271 | return self | |
279 | 272 | |
280 | 273 | def items(self, selector=None): |
281 | 274 | """Iter over elements. Return PyQuery objects: |
293 | 286 | else: |
294 | 287 | elems = self |
295 | 288 | for elem in elems: |
296 | yield self.__class__(elem, **dict(parent=self)) | |
289 | yield self._copy(elem, parent=self) | |
297 | 290 | |
298 | 291 | def xhtml_to_html(self): |
299 | 292 | """Remove xhtml namespace: |
341 | 334 | <script><![[CDATA[ ]></script> |
342 | 335 | |
343 | 336 | """ |
344 | if PY3k: | |
345 | return ''.join([etree.tostring(e, encoding=str) for e in self]) | |
346 | else: | |
347 | return ''.join([etree.tostring(e) for e in self]) | |
337 | return ''.join([etree.tostring(e, encoding=str) for e in self]) | |
348 | 338 | |
349 | 339 | def __unicode__(self): |
350 | 340 | """xml representation of current nodes""" |
351 | return unicode('').join([etree.tostring(e, encoding=unicode) | |
352 | for e in self]) | |
341 | return u''.join([etree.tostring(e, encoding=str) | |
342 | for e in self]) | |
353 | 343 | |
354 | 344 | def __html__(self): |
355 | 345 | """html representation of current nodes:: |
360 | 350 | <script><![[CDATA[ ]></script> |
361 | 351 | |
362 | 352 | """ |
363 | return unicode('').join([lxml.html.tostring(e, encoding=unicode) | |
364 | for e in self]) | |
353 | return u''.join([lxml.html.tostring(e, encoding=str) | |
354 | for e in self]) | |
365 | 355 | |
366 | 356 | def __repr__(self): |
367 | 357 | r = [] |
374 | 364 | r.append('<%s%s%s>' % (el.tag, id, c)) |
375 | 365 | return '[' + (', '.join(r)) + ']' |
376 | 366 | except AttributeError: |
377 | if PY3k: | |
378 | return list.__repr__(self) | |
379 | else: | |
380 | for el in self: | |
381 | if isinstance(el, unicode): | |
382 | r.append(el.encode('utf-8')) | |
383 | else: | |
384 | r.append(el) | |
385 | return repr(r) | |
367 | return list.__repr__(self) | |
386 | 368 | |
387 | 369 | @property |
388 | 370 | def root(self): |
389 | 371 | """return the xml root element |
390 | 372 | """ |
391 | 373 | if self._parent is not no_default: |
392 | return self._parent.getroottree() | |
374 | return self._parent[0].getroottree() | |
393 | 375 | return self[0].getroottree() |
394 | 376 | |
395 | 377 | @property |
414 | 396 | xpath = self._css_to_xpath(selector, 'self::') |
415 | 397 | results = [] |
416 | 398 | for tag in elements: |
417 | results.extend(tag.xpath(xpath)) | |
399 | results.extend(tag.xpath(xpath, namespaces=self.namespaces)) | |
418 | 400 | if reverse: |
419 | 401 | results.reverse() |
420 | 402 | if unique: |
421 | 403 | result_list = results |
422 | 404 | results = [] |
423 | 405 | for item in result_list: |
424 | if not item in results: | |
406 | if item not in results: | |
425 | 407 | results.append(item) |
426 | return self.__class__(results, **dict(parent=self)) | |
408 | return self._copy(results, parent=self) | |
427 | 409 | |
428 | 410 | def parent(self, selector=None): |
429 | 411 | return self._filter_only( |
474 | 456 | """ |
475 | 457 | return self._filter_only(selector, self._next_all()) |
476 | 458 | |
459 | @with_camel_case_alias | |
460 | def next_until(self, selector, filter_=None): | |
461 | """ | |
462 | >>> h = ''' | |
463 | ... <h2>Greeting 1</h2> | |
464 | ... <p>Hello!</p><p>World!</p> | |
465 | ... <h2>Greeting 2</h2><p>Bye!</p> | |
466 | ... ''' | |
467 | >>> d = PyQuery(h) | |
468 | >>> d('h2:first').nextUntil('h2') | |
469 | [<p>, <p>] | |
470 | """ | |
471 | return self._filter_only( | |
472 | filter_, [ | |
473 | e | |
474 | for q in itertools.takewhile( | |
475 | lambda q: not q.is_(selector), self.next_all().items()) | |
476 | for e in q | |
477 | ] | |
478 | ) | |
479 | ||
477 | 480 | def _prev_all(self): |
478 | 481 | return [e for e in self._traverse('getprevious')] |
479 | 482 | |
547 | 550 | result = [] |
548 | 551 | for current in self: |
549 | 552 | while (current is not None and |
550 | not self.__class__(current).is_(selector)): | |
553 | not self._copy(current).is_(selector)): | |
551 | 554 | current = current.getparent() |
552 | 555 | if current is not None: |
553 | 556 | result.append(current) |
554 | return self.__class__(result, **dict(parent=self)) | |
557 | return self._copy(result, parent=self) | |
555 | 558 | |
556 | 559 | def contents(self): |
557 | 560 | """ |
563 | 566 | """ |
564 | 567 | results = [] |
565 | 568 | for elem in self: |
566 | results.extend(elem.xpath('child::text()|child::*')) | |
567 | return self.__class__(results, **dict(parent=self)) | |
569 | results.extend(elem.xpath('child::text()|child::*', | |
570 | namespaces=self.namespaces)) | |
571 | return self._copy(results, parent=self) | |
568 | 572 | |
569 | 573 | def filter(self, selector): |
570 | 574 | """Filter elements in self using selector (string or function): |
585 | 589 | return self._filter_only(selector, self) |
586 | 590 | else: |
587 | 591 | elements = [] |
588 | args = inspect.getargspec(callback).args | |
592 | args = getargspec(callback) | |
589 | 593 | try: |
590 | 594 | for i, this in enumerate(self): |
591 | 595 | if len(args) == 1: |
592 | func_globals(selector)['this'] = this | |
596 | selector.__globals__['this'] = this | |
593 | 597 | if callback(selector, i, this): |
594 | 598 | elements.append(this) |
595 | 599 | finally: |
596 | f_globals = func_globals(selector) | |
600 | f_globals = selector.__globals__ | |
597 | 601 | if 'this' in f_globals: |
598 | 602 | del f_globals['this'] |
599 | return self.__class__(elements, **dict(parent=self)) | |
603 | return self._copy(elements, parent=self) | |
600 | 604 | |
601 | 605 | def not_(self, selector): |
602 | 606 | """Return elements that don't match the given selector: |
605 | 609 | >>> d('p').not_('.hello') |
606 | 610 | [<p>] |
607 | 611 | """ |
608 | exclude = set(self.__class__(selector, self)) | |
609 | return self.__class__([e for e in self if e not in exclude], | |
610 | **dict(parent=self)) | |
612 | exclude = set(self._copy(selector, self)) | |
613 | return self._copy([e for e in self if e not in exclude], | |
614 | parent=self) | |
611 | 615 | |
612 | 616 | def is_(self, selector): |
613 | 617 | """Returns True if selector matches at least one current element, else |
638 | 642 | [<em>] |
639 | 643 | """ |
640 | 644 | xpath = self._css_to_xpath(selector) |
641 | results = [child.xpath(xpath) for tag in self | |
645 | results = [child.xpath(xpath, namespaces=self.namespaces) | |
646 | for tag in self | |
642 | 647 | for child in tag.getchildren()] |
643 | 648 | # Flatten the results |
644 | 649 | elements = [] |
645 | 650 | for r in results: |
646 | 651 | elements.extend(r) |
647 | return self.__class__(elements, **dict(parent=self)) | |
652 | return self._copy(elements, parent=self) | |
648 | 653 | |
649 | 654 | def eq(self, index): |
650 | 655 | """Return PyQuery of only the element with the provided index:: |
659 | 664 | |
660 | 665 | .. |
661 | 666 | """ |
662 | # Use slicing to silently handle out of bounds indexes | |
663 | items = self[index:index + 1] | |
664 | return self.__class__(items, **dict(parent=self)) | |
667 | # Slicing will return empty list when index=-1 | |
668 | # we should handle out of bound by ourselves | |
669 | try: | |
670 | items = self[index] | |
671 | except IndexError: | |
672 | items = [] | |
673 | return self._copy(items, parent=self) | |
665 | 674 | |
666 | 675 | def each(self, func): |
667 | 676 | """apply func on each nodes |
668 | 677 | """ |
669 | 678 | try: |
670 | 679 | for i, element in enumerate(self): |
671 | func_globals(func)['this'] = element | |
680 | func.__globals__['this'] = element | |
672 | 681 | if callback(func, i, element) is False: |
673 | 682 | break |
674 | 683 | finally: |
675 | f_globals = func_globals(func) | |
684 | f_globals = func.__globals__ | |
676 | 685 | if 'this' in f_globals: |
677 | 686 | del f_globals['this'] |
678 | 687 | return self |
697 | 706 | items = [] |
698 | 707 | try: |
699 | 708 | for i, element in enumerate(self): |
700 | func_globals(func)['this'] = element | |
709 | func.__globals__['this'] = element | |
701 | 710 | result = callback(func, i, element) |
702 | 711 | if result is not None: |
703 | 712 | if not isinstance(result, list): |
705 | 714 | else: |
706 | 715 | items.extend(result) |
707 | 716 | finally: |
708 | f_globals = func_globals(func) | |
717 | f_globals = func.__globals__ | |
709 | 718 | if 'this' in f_globals: |
710 | 719 | del f_globals['this'] |
711 | return self.__class__(items, **dict(parent=self)) | |
720 | return self._copy(items, parent=self) | |
712 | 721 | |
713 | 722 | @property |
714 | 723 | def length(self): |
759 | 768 | tag.set(key, value) |
760 | 769 | elif value is no_default: |
761 | 770 | return self[0].get(attr) |
762 | elif value is None or value == '': | |
771 | elif value is None: | |
763 | 772 | return self.remove_attr(attr) |
764 | 773 | else: |
765 | 774 | for tag in self: |
923 | 932 | # CORE UI EFFECTS # |
924 | 933 | ################### |
925 | 934 | def hide(self): |
926 | """remove display:none to elements style | |
935 | """Add display:none to elements style: | |
927 | 936 | |
928 | 937 | >>> print(PyQuery('<div style="display:none;"/>').hide()) |
929 | 938 | <div style="display: none"/> |
932 | 941 | return self.css('display', 'none') |
933 | 942 | |
934 | 943 | def show(self): |
935 | """add display:block to elements style | |
944 | """Add display:block to elements style: | |
936 | 945 | |
937 | 946 | >>> print(PyQuery('<div />').show()) |
938 | 947 | <div style="display: block"/> |
955 | 964 | >>> d.val() |
956 | 965 | 'Youhou' |
957 | 966 | |
958 | """ | |
959 | return self.attr('value', value) or None | |
967 | Set the selected values for a `select` element with the `multiple` | |
968 | attribute:: | |
969 | ||
970 | >>> d = PyQuery(''' | |
971 | ... <select multiple> | |
972 | ... <option value="you"><option value="hou"> | |
973 | ... </select> | |
974 | ... ''') | |
975 | >>> d.val(['you', 'hou']) | |
976 | [<select>] | |
977 | ||
978 | Get the selected values for a `select` element with the `multiple` | |
979 | attribute:: | |
980 | ||
981 | >>> d.val() | |
982 | ['you', 'hou'] | |
983 | ||
984 | """ | |
985 | def _get_value(tag): | |
986 | # <textarea> | |
987 | if tag.tag == 'textarea': | |
988 | return self._copy(tag).html() | |
989 | # <select> | |
990 | elif tag.tag == 'select': | |
991 | if 'multiple' in tag.attrib: | |
992 | # Only extract value if selected | |
993 | selected = self._copy(tag)('option[selected]') | |
994 | # Rebuild list to avoid serialization error | |
995 | return list(selected.map( | |
996 | lambda _, o: self._copy(o).attr('value') | |
997 | )) | |
998 | selected_option = self._copy(tag)('option[selected]:last') | |
999 | if selected_option: | |
1000 | return selected_option.attr('value') | |
1001 | else: | |
1002 | return self._copy(tag)('option').attr('value') | |
1003 | # <input type="checkbox"> or <input type="radio"> | |
1004 | elif self.is_(':checkbox,:radio'): | |
1005 | val = self._copy(tag).attr('value') | |
1006 | if val is None: | |
1007 | return 'on' | |
1008 | else: | |
1009 | return val | |
1010 | # <input> | |
1011 | elif tag.tag == 'input': | |
1012 | val = self._copy(tag).attr('value') | |
1013 | return val.replace('\n', '') if val else '' | |
1014 | # everything else. | |
1015 | return self._copy(tag).attr('value') or '' | |
1016 | ||
1017 | def _set_value(pq, value): | |
1018 | for tag in pq: | |
1019 | # <select> | |
1020 | if tag.tag == 'select': | |
1021 | if not isinstance(value, list): | |
1022 | value = [value] | |
1023 | ||
1024 | def _make_option_selected(_, elem): | |
1025 | pq = self._copy(elem) | |
1026 | if pq.attr('value') in value: | |
1027 | pq.attr('selected', 'selected') | |
1028 | if 'multiple' not in tag.attrib: | |
1029 | del value[:] # Ensure it toggles first match | |
1030 | else: | |
1031 | pq.removeAttr('selected') | |
1032 | ||
1033 | self._copy(tag)('option').each(_make_option_selected) | |
1034 | continue | |
1035 | # Stringify array | |
1036 | if isinstance(value, list): | |
1037 | value = ','.join(value) | |
1038 | # <textarea> | |
1039 | if tag.tag == 'textarea': | |
1040 | self._copy(tag).text(value) | |
1041 | continue | |
1042 | # <input> and everything else. | |
1043 | self._copy(tag).attr('value', value) | |
1044 | ||
1045 | if value is no_default: | |
1046 | if len(self): | |
1047 | return _get_value(self[0]) | |
1048 | else: | |
1049 | _set_value(self, value) | |
1050 | return self | |
960 | 1051 | |
961 | 1052 | def html(self, value=no_default, **kwargs): |
962 | 1053 | """Get or set the html representation of sub nodes. |
987 | 1078 | return None |
988 | 1079 | tag = self[0] |
989 | 1080 | children = tag.getchildren() |
1081 | html = escape(tag.text or '', quote=False) | |
990 | 1082 | if not children: |
991 | return tag.text | |
992 | html = tag.text or '' | |
1083 | return html | |
993 | 1084 | if 'encoding' not in kwargs: |
994 | kwargs['encoding'] = unicode | |
995 | html += unicode('').join([etree.tostring(e, **kwargs) | |
996 | for e in children]) | |
1085 | kwargs['encoding'] = str | |
1086 | html += u''.join([etree.tostring(e, **kwargs) | |
1087 | for e in children]) | |
997 | 1088 | return html |
998 | 1089 | else: |
999 | 1090 | if isinstance(value, self.__class__): |
1000 | new_html = unicode(value) | |
1091 | new_html = str(value) | |
1001 | 1092 | elif isinstance(value, basestring): |
1002 | 1093 | new_html = value |
1003 | 1094 | elif not value: |
1009 | 1100 | for child in tag.getchildren(): |
1010 | 1101 | tag.remove(child) |
1011 | 1102 | root = fromstring( |
1012 | unicode('<root>') + new_html + unicode('</root>'), | |
1103 | u'<root>' + new_html + u'</root>', | |
1013 | 1104 | self.parser)[0] |
1014 | 1105 | children = root.getchildren() |
1015 | 1106 | if children: |
1016 | 1107 | tag.extend(children) |
1017 | 1108 | tag.text = root.text |
1018 | tag.tail = root.tail | |
1019 | return self | |
1020 | ||
1021 | @with_camel_case_alias | |
1022 | def outer_html(self): | |
1109 | return self | |
1110 | ||
1111 | @with_camel_case_alias | |
1112 | def outer_html(self, method="html"): | |
1023 | 1113 | """Get the html representation of the first selected element:: |
1024 | 1114 | |
1025 | 1115 | >>> d = PyQuery('<div><span class="red">toto</span> rocks</div>') |
1043 | 1133 | if e0.tail: |
1044 | 1134 | e0 = deepcopy(e0) |
1045 | 1135 | e0.tail = '' |
1046 | return lxml.html.tostring(e0, encoding=unicode) | |
1047 | ||
1048 | def text(self, value=no_default): | |
1136 | return etree.tostring(e0, encoding=str, method=method) | |
1137 | ||
1138 | def text(self, value=no_default, **kwargs): | |
1049 | 1139 | """Get or set the text representation of sub nodes. |
1050 | 1140 | |
1051 | 1141 | Get the text value:: |
1052 | 1142 | |
1053 | 1143 | >>> doc = PyQuery('<div><span>toto</span><span>tata</span></div>') |
1054 | 1144 | >>> print(doc.text()) |
1145 | tototata | |
1146 | >>> doc = PyQuery('''<div><span>toto</span> | |
1147 | ... <span>tata</span></div>''') | |
1148 | >>> print(doc.text()) | |
1055 | 1149 | toto tata |
1150 | ||
1151 | Get the text value, without squashing newlines:: | |
1152 | ||
1153 | >>> doc = PyQuery('''<div><span>toto</span> | |
1154 | ... <span>tata</span></div>''') | |
1155 | >>> print(doc.text(squash_space=False)) | |
1156 | toto | |
1157 | tata | |
1056 | 1158 | |
1057 | 1159 | Set the text value:: |
1058 | 1160 | |
1066 | 1168 | if value is no_default: |
1067 | 1169 | if not self: |
1068 | 1170 | return '' |
1069 | ||
1070 | text = [] | |
1071 | ||
1072 | def add_text(tag, no_tail=False): | |
1073 | if tag.text and not isinstance(tag, lxml.etree._Comment): | |
1074 | text.append(tag.text) | |
1075 | for child in tag.getchildren(): | |
1076 | add_text(child) | |
1077 | if not no_tail and tag.tail: | |
1078 | text.append(tag.tail) | |
1079 | ||
1080 | for tag in self: | |
1081 | add_text(tag, no_tail=True) | |
1082 | return ' '.join([t.strip() for t in text if t.strip()]) | |
1171 | return ' '.join( | |
1172 | self._copy(tag).html() if tag.tag == 'textarea' else | |
1173 | extract_text(tag, **kwargs) for tag in self | |
1174 | ) | |
1083 | 1175 | |
1084 | 1176 | for tag in self: |
1085 | 1177 | for child in tag.getchildren(): |
1093 | 1185 | |
1094 | 1186 | def _get_root(self, value): |
1095 | 1187 | if isinstance(value, basestring): |
1096 | root = fromstring(unicode('<root>') + value + unicode('</root>'), | |
1188 | root = fromstring(u'<root>' + value + u'</root>', | |
1097 | 1189 | self.parser)[0] |
1098 | 1190 | elif isinstance(value, etree._Element): |
1099 | root = self.__class__(value) | |
1191 | root = self._copy(value) | |
1100 | 1192 | elif isinstance(value, PyQuery): |
1101 | 1193 | root = value |
1102 | 1194 | else: |
1125 | 1217 | if i > 0: |
1126 | 1218 | root = deepcopy(list(root)) |
1127 | 1219 | tag.extend(root) |
1128 | root = tag[-len(root):] | |
1129 | 1220 | return self |
1130 | 1221 | |
1131 | 1222 | @with_camel_case_alias |
1299 | 1390 | |
1300 | 1391 | @with_camel_case_alias |
1301 | 1392 | def replace_with(self, value): |
1302 | """replace nodes by value:: | |
1393 | """replace nodes by value: | |
1303 | 1394 | |
1304 | 1395 | >>> doc = PyQuery("<html><div /></html>") |
1305 | 1396 | >>> node = PyQuery("<span />") |
1314 | 1405 | value = str(value) |
1315 | 1406 | if hasattr(value, '__call__'): |
1316 | 1407 | for i, element in enumerate(self): |
1317 | self.__class__(element).before( | |
1408 | self._copy(element).before( | |
1318 | 1409 | value(i, element) + (element.tail or '')) |
1319 | 1410 | parent = element.getparent() |
1320 | 1411 | parent.remove(element) |
1321 | 1412 | else: |
1322 | 1413 | for tag in self: |
1323 | self.__class__(tag).before(value + (tag.tail or '')) | |
1414 | self._copy(tag).before(value + (tag.tail or '')) | |
1324 | 1415 | parent = tag.getparent() |
1325 | 1416 | parent.remove(tag) |
1326 | 1417 | return self |
1351 | 1442 | def remove(self, expr=no_default): |
1352 | 1443 | """Remove nodes: |
1353 | 1444 | |
1354 | >>> h = '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>' | |
1355 | >>> d = PyQuery(h) | |
1356 | >>> d('strong').remove() | |
1357 | [<strong>] | |
1358 | >>> print(d) | |
1359 | <div>Maybe <em>she</em> does know</div> | |
1445 | >>> h = ( | |
1446 | ... '<div>Maybe <em>she</em> does <strong>NOT</strong> know</div>' | |
1447 | ... ) | |
1448 | >>> d = PyQuery(h) | |
1449 | >>> d('strong').remove() | |
1450 | [<strong>] | |
1451 | >>> print(d) | |
1452 | <div>Maybe <em>she</em> does know</div> | |
1360 | 1453 | """ |
1361 | 1454 | if expr is no_default: |
1362 | 1455 | for tag in self: |
1367 | 1460 | if prev is None: |
1368 | 1461 | if not parent.text: |
1369 | 1462 | parent.text = '' |
1370 | parent.text += ' ' + tag.tail | |
1463 | parent.text += tag.tail | |
1371 | 1464 | else: |
1372 | 1465 | if not prev.tail: |
1373 | 1466 | prev.tail = '' |
1374 | prev.tail += ' ' + tag.tail | |
1467 | prev.tail += tag.tail | |
1375 | 1468 | parent.remove(tag) |
1376 | 1469 | else: |
1377 | results = self.__class__(expr, self) | |
1470 | results = self._copy(expr, self) | |
1378 | 1471 | results.remove() |
1379 | 1472 | return self |
1380 | 1473 | |
1393 | 1486 | """ |
1394 | 1487 | def __setattr__(self, name, func): |
1395 | 1488 | def fn(self, *args, **kwargs): |
1396 | func_globals(func)['this'] = self | |
1489 | func.__globals__['this'] = self | |
1397 | 1490 | return func(*args, **kwargs) |
1398 | 1491 | fn.__name__ = name |
1399 | 1492 | setattr(PyQuery, name, fn) |
1400 | 1493 | fn = Fn() |
1401 | 1494 | |
1495 | ######## | |
1496 | # AJAX # | |
1497 | ######## | |
1498 | ||
1499 | @with_camel_case_alias | |
1500 | def serialize_array(self): | |
1501 | """Serialize form elements as an array of dictionaries, whose structure | |
1502 | mirrors that produced by the jQuery API. Notably, it does not handle | |
1503 | the deprecated `keygen` form element. | |
1504 | ||
1505 | >>> d = PyQuery('<form><input name="order" value="spam"></form>') | |
1506 | >>> d.serialize_array() == [{'name': 'order', 'value': 'spam'}] | |
1507 | True | |
1508 | >>> d.serializeArray() == [{'name': 'order', 'value': 'spam'}] | |
1509 | True | |
1510 | """ | |
1511 | return list(map( | |
1512 | lambda p: {'name': p[0], 'value': p[1]}, | |
1513 | self.serialize_pairs() | |
1514 | )) | |
1515 | ||
1516 | def serialize(self): | |
1517 | """Serialize form elements as a URL-encoded string. | |
1518 | ||
1519 | >>> h = ( | |
1520 | ... '<form><input name="order" value="spam">' | |
1521 | ... '<input name="order2" value="baked beans"></form>' | |
1522 | ... ) | |
1523 | >>> d = PyQuery(h) | |
1524 | >>> d.serialize() | |
1525 | 'order=spam&order2=baked%20beans' | |
1526 | """ | |
1527 | return urlencode(self.serialize_pairs()).replace('+', '%20') | |
1528 | ||
1402 | 1529 | ##################################################### |
1403 | 1530 | # Additional methods that are not in the jQuery API # |
1404 | 1531 | ##################################################### |
1532 | ||
1533 | @with_camel_case_alias | |
1534 | def serialize_pairs(self): | |
1535 | """Serialize form elements as an array of 2-tuples conventional for | |
1536 | typical URL-parsing operations in Python. | |
1537 | ||
1538 | >>> d = PyQuery('<form><input name="order" value="spam"></form>') | |
1539 | >>> d.serialize_pairs() | |
1540 | [('order', 'spam')] | |
1541 | >>> d.serializePairs() | |
1542 | [('order', 'spam')] | |
1543 | """ | |
1544 | # https://github.com/jquery/jquery/blob | |
1545 | # /2d4f53416e5f74fa98e0c1d66b6f3c285a12f0ce/src/serialize.js#L14 | |
1546 | _submitter_types = ['submit', 'button', 'image', 'reset', 'file'] | |
1547 | ||
1548 | controls = self._copy([]) | |
1549 | # Expand list of form controls | |
1550 | for el in self.items(): | |
1551 | if el[0].tag == 'form': | |
1552 | form_id = el.attr('id') | |
1553 | if form_id: | |
1554 | # Include inputs outside of their form owner | |
1555 | root = self._copy(el.root.getroot()) | |
1556 | controls.extend(root( | |
1557 | '#%s :not([form]):input, [form="%s"]:input' | |
1558 | % (form_id, form_id))) | |
1559 | else: | |
1560 | controls.extend(el(':not([form]):input')) | |
1561 | elif el[0].tag == 'fieldset': | |
1562 | controls.extend(el(':input')) | |
1563 | else: | |
1564 | controls.extend(el) | |
1565 | # Filter controls | |
1566 | selector = '[name]:enabled:not(button)' # Not serializing image button | |
1567 | selector += ''.join(map( | |
1568 | lambda s: ':not([type="%s"])' % s, | |
1569 | _submitter_types)) | |
1570 | controls = controls.filter(selector) | |
1571 | ||
1572 | def _filter_out_unchecked(_, el): | |
1573 | el = controls._copy(el) | |
1574 | return not el.is_(':checkbox:not(:checked)') and \ | |
1575 | not el.is_(':radio:not(:checked)') | |
1576 | controls = controls.filter(_filter_out_unchecked) | |
1577 | ||
1578 | # jQuery serializes inputs with the datalist element as an ancestor | |
1579 | # contrary to WHATWG spec as of August 2018 | |
1580 | # | |
1581 | # xpath = 'self::*[not(ancestor::datalist)]' | |
1582 | # results = [] | |
1583 | # for tag in controls: | |
1584 | # results.extend(tag.xpath(xpath, namespaces=controls.namespaces)) | |
1585 | # controls = controls._copy(results) | |
1586 | ||
1587 | # Serialize values | |
1588 | ret = [] | |
1589 | for field in controls: | |
1590 | val = self._copy(field).val() or '' | |
1591 | if isinstance(val, list): | |
1592 | ret.extend(map( | |
1593 | lambda v: (field.attrib['name'], v.replace('\n', '\r\n')), | |
1594 | val | |
1595 | )) | |
1596 | else: | |
1597 | ret.append((field.attrib['name'], val.replace('\n', '\r\n'))) | |
1598 | return ret | |
1599 | ||
1600 | @with_camel_case_alias | |
1601 | def serialize_dict(self): | |
1602 | """Serialize form elements as an ordered dictionary. Multiple values | |
1603 | corresponding to the same input name are concatenated into one list. | |
1604 | ||
1605 | >>> d = PyQuery('''<form> | |
1606 | ... <input name="order" value="spam"> | |
1607 | ... <input name="order" value="eggs"> | |
1608 | ... <input name="order2" value="ham"> | |
1609 | ... </form>''') | |
1610 | >>> d.serialize_dict() | |
1611 | OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) | |
1612 | >>> d.serializeDict() | |
1613 | OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) | |
1614 | """ | |
1615 | ret = OrderedDict() | |
1616 | for name, val in self.serialize_pairs(): | |
1617 | if name not in ret: | |
1618 | ret[name] = val | |
1619 | elif not isinstance(ret[name], list): | |
1620 | ret[name] = [ret[name], val] | |
1621 | else: | |
1622 | ret[name].append(val) | |
1623 | return ret | |
1405 | 1624 | |
1406 | 1625 | @property |
1407 | 1626 | def base_url(self): |
1422 | 1641 | 'You need a base URL to make your links' |
1423 | 1642 | 'absolute. It can be provided by the base_url parameter.')) |
1424 | 1643 | |
1425 | def repl(i, e): | |
1426 | return self(e).attr( | |
1427 | 'href', | |
1428 | urljoin(base_url, self(e).attr('href'))) | |
1429 | ||
1430 | self('a').each(repl) | |
1431 | return self | |
1644 | def repl(attr): | |
1645 | def rep(i, e): | |
1646 | attr_value = self(e).attr(attr) | |
1647 | # when label hasn't such attr, pass | |
1648 | if attr_value is None: | |
1649 | return None | |
1650 | ||
1651 | # skip specific "protocol" schemas | |
1652 | if any(attr_value.startswith(schema) | |
1653 | for schema in ('tel:', 'callto:', 'sms:')): | |
1654 | return None | |
1655 | ||
1656 | return self(e).attr(attr, | |
1657 | urljoin(base_url, attr_value.strip())) | |
1658 | return rep | |
1659 | ||
1660 | self('a').each(repl('href')) | |
1661 | self('link').each(repl('href')) | |
1662 | self('script').each(repl('src')) | |
1663 | self('img').each(repl('src')) | |
1664 | self('iframe').each(repl('src')) | |
1665 | self('form').each(repl('action')) | |
1666 | ||
1667 | return self | |
1668 | ||
1432 | 1669 | |
1433 | 1670 | build_camel_case_aliases(PyQuery) |
0 | # -*- coding: utf-8 -*- | |
1 | try: | |
2 | from deliverance.pyref import PyReference | |
3 | from deliverance import rules | |
4 | from ajax import PyQuery as pq | |
5 | except ImportError: | |
6 | pass | |
7 | else: | |
8 | class PyQuery(rules.AbstractAction): | |
9 | """Python function""" | |
10 | name = 'py' | |
11 | def __init__(self, source_location, pyref): | |
12 | self.source_location = source_location | |
13 | self.pyref = pyref | |
14 | ||
15 | def apply(self, content_doc, theme_doc, resource_fetcher, log): | |
16 | self.pyref(pq([content_doc]), pq([theme_doc]), resource_fetcher, log) | |
17 | ||
18 | @classmethod | |
19 | def from_xml(cls, el, source_location): | |
20 | """Parses and instantiates the class from an element""" | |
21 | pyref = PyReference.parse_xml( | |
22 | el, source_location=source_location, | |
23 | default_function='transform') | |
24 | return cls(source_location, pyref) | |
25 | ||
26 | rules._actions['pyquery'] = PyQuery | |
27 | ||
28 | def deliverance_proxy(): | |
29 | import deliverance.proxycommand | |
30 | deliverance.proxycommand.main() |
0 | import re | |
1 | ||
2 | ||
3 | # https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements#Elements | |
4 | INLINE_TAGS = { | |
5 | 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button', 'cite', | |
6 | 'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd', 'label', 'map', | |
7 | 'object', 'q', 'samp', 'script', 'select', 'small', 'span', 'strong', | |
8 | 'sub', 'sup', 'textarea', 'time', 'tt', 'var' | |
9 | } | |
10 | ||
11 | SEPARATORS = {'br'} | |
12 | ||
13 | ||
14 | # Definition of whitespace in HTML: | |
15 | # https://www.w3.org/TR/html4/struct/text.html#h-9.1 | |
16 | WHITESPACE_RE = re.compile(u'[\x20\x09\x0C\u200B\x0A\x0D]+') | |
17 | ||
18 | ||
19 | def squash_html_whitespace(text): | |
20 | # use raw extract_text for preformatted content (like <pre> content or set | |
21 | # by CSS rules) | |
22 | # apply this function on top of | |
23 | return WHITESPACE_RE.sub(' ', text) | |
24 | ||
25 | ||
26 | def _squash_artifical_nl(parts): | |
27 | output, last_nl = [], False | |
28 | for x in parts: | |
29 | if x is not None: | |
30 | output.append(x) | |
31 | last_nl = False | |
32 | elif not last_nl: | |
33 | output.append(None) | |
34 | last_nl = True | |
35 | return output | |
36 | ||
37 | ||
38 | def _strip_artifical_nl(parts): | |
39 | if not parts: | |
40 | return parts | |
41 | for start_idx, pt in enumerate(parts): | |
42 | if isinstance(pt, str): | |
43 | # 0, 1, 2, index of first string [start_idx:... | |
44 | break | |
45 | iterator = enumerate(parts[:start_idx - 1 if start_idx > 0 else None:-1]) | |
46 | for end_idx, pt in iterator: | |
47 | if isinstance(pt, str): # 0=None, 1=-1, 2=-2, index of last string | |
48 | break | |
49 | return parts[start_idx:-end_idx if end_idx > 0 else None] | |
50 | ||
51 | ||
52 | def _merge_original_parts(parts): | |
53 | output, orp_buf = [], [] | |
54 | ||
55 | def flush(): | |
56 | if orp_buf: | |
57 | item = squash_html_whitespace(''.join(orp_buf)).strip() | |
58 | if item: | |
59 | output.append(item) | |
60 | orp_buf[:] = [] | |
61 | ||
62 | for x in parts: | |
63 | if not isinstance(x, str): | |
64 | flush() | |
65 | output.append(x) | |
66 | else: | |
67 | orp_buf.append(x) | |
68 | flush() | |
69 | return output | |
70 | ||
71 | ||
72 | def extract_text_array(dom, squash_artifical_nl=True, strip_artifical_nl=True): | |
73 | if callable(dom.tag): | |
74 | return '' | |
75 | r = [] | |
76 | if dom.tag in SEPARATORS: | |
77 | r.append(True) # equivalent of '\n' used to designate separators | |
78 | elif dom.tag not in INLINE_TAGS: | |
79 | # equivalent of '\n' used to designate artificially inserted newlines | |
80 | r.append(None) | |
81 | if dom.text is not None: | |
82 | r.append(dom.text) | |
83 | for child in dom.getchildren(): | |
84 | r.extend(extract_text_array(child, squash_artifical_nl=False, | |
85 | strip_artifical_nl=False)) | |
86 | if child.tail is not None: | |
87 | r.append(child.tail) | |
88 | if dom.tag not in INLINE_TAGS and dom.tag not in SEPARATORS: | |
89 | # equivalent of '\n' used to designate artificially inserted newlines | |
90 | r.append(None) | |
91 | if squash_artifical_nl: | |
92 | r = _squash_artifical_nl(r) | |
93 | if strip_artifical_nl: | |
94 | r = _strip_artifical_nl(r) | |
95 | return r | |
96 | ||
97 | ||
98 | def extract_text(dom, block_symbol='\n', sep_symbol='\n', squash_space=True): | |
99 | a = extract_text_array(dom, squash_artifical_nl=squash_space) | |
100 | if squash_space: | |
101 | a = _strip_artifical_nl(_squash_artifical_nl(_merge_original_parts(a))) | |
102 | result = ''.join( | |
103 | block_symbol if x is None else ( | |
104 | sep_symbol if x is True else x | |
105 | ) | |
106 | for x in a | |
107 | ) | |
108 | if squash_space: | |
109 | result = result.strip() | |
110 | return result |
0 | Metadata-Version: 1.1 | |
0 | Metadata-Version: 2.1 | |
1 | 1 | Name: pyquery |
2 | Version: 1.2.9 | |
2 | Version: 2.0.0.dev0 | |
3 | 3 | Summary: A jquery-like library for python |
4 | 4 | Home-page: https://github.com/gawel/pyquery |
5 | Author: Gael Pasgrimaud | |
6 | Author-email: gael@gawel.org | |
5 | Author: Olivier Lauzanne | |
6 | Author-email: olauzanne@gmail.com | |
7 | Maintainer: Gael Pasgrimaud | |
8 | Maintainer-email: gael@gawel.org | |
7 | 9 | License: BSD |
8 | Description: | |
9 | pyquery: a jquery-like library for python | |
10 | ========================================= | |
11 | ||
12 | pyquery allows you to make jquery queries on xml documents. | |
13 | The API is as much as possible the similar to jquery. pyquery uses lxml for fast | |
14 | xml and html manipulation. | |
15 | ||
16 | This is not (or at least not yet) a library to produce or interact with | |
17 | javascript code. I just liked the jquery API and I missed it in python so I | |
18 | told myself "Hey let's make jquery in python". This is the result. | |
19 | ||
20 | The `project`_ is being actively developped on a git repository on Github. I | |
21 | have the policy of giving push access to anyone who wants it and then to review | |
22 | what he does. So if you want to contribute just email me. | |
23 | ||
24 | Please report bugs on the `github | |
25 | <https://github.com/gawel/pyquery/issues>`_ issue | |
26 | tracker. | |
27 | ||
28 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance | |
29 | .. _project: https://github.com/gawel/pyquery/ | |
30 | ||
31 | Quickstart | |
32 | ========== | |
33 | ||
34 | You can use the PyQuery class to load an xml document from a string, a lxml | |
35 | document, from a file or from an url:: | |
36 | ||
37 | >>> from pyquery import PyQuery as pq | |
38 | >>> from lxml import etree | |
39 | >>> import urllib | |
40 | >>> d = pq("<html></html>") | |
41 | >>> d = pq(etree.fromstring("<html></html>")) | |
42 | >>> d = pq(url=your_url) | |
43 | >>> d = pq(url=your_url, | |
44 | ... opener=lambda url, **kw: urlopen(url).read()) | |
45 | >>> d = pq(filename=path_to_html_file) | |
46 | ||
47 | Now d is like the $ in jquery:: | |
48 | ||
49 | >>> d("#hello") | |
50 | [<p#hello.hello>] | |
51 | >>> p = d("#hello") | |
52 | >>> print(p.html()) | |
53 | Hello world ! | |
54 | >>> p.html("you know <a href='http://python.org/'>Python</a> rocks") | |
55 | [<p#hello.hello>] | |
56 | >>> print(p.html()) | |
57 | you know <a href="http://python.org/">Python</a> rocks | |
58 | >>> print(p.text()) | |
59 | you know Python rocks | |
60 | ||
61 | You can use some of the pseudo classes that are available in jQuery but that | |
62 | are not standard in css such as :first :last :even :odd :eq :lt :gt :checked | |
63 | :selected :file:: | |
64 | ||
65 | >>> d('p:first') | |
66 | [<p#hello.hello>] | |
67 | ||
68 | ||
69 | ||
70 | See http://pyquery.rtfd.org/ for the full documentation | |
71 | ||
72 | News | |
73 | ==== | |
74 | ||
75 | 1.2.9 (2014-08-22) | |
76 | ------------------ | |
77 | ||
78 | - Support for keyword arguments in PyQuery custom functions | |
79 | ||
80 | - Fixed #78: items must take care or the parent | |
81 | ||
82 | - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute | |
83 | when it isn't there | |
84 | ||
85 | - Fixed #19. ``is_()`` was broken. | |
86 | ||
87 | - Fixed #9. ``.replaceWith(PyQuery element)`` raises error | |
88 | ||
89 | - Remove official python3.2 support (mostly because of 3rd party semi-deps) | |
90 | ||
91 | ||
92 | 1.2.8 (2013-12-21) | |
93 | ------------------ | |
94 | ||
95 | - Fixed #22: Open by filename fails when file contains invalid xml | |
96 | ||
97 | - Bug fix in .remove_class() | |
98 | ||
99 | ||
100 | 1.2.7 (2013-12-21) | |
101 | ------------------ | |
102 | ||
103 | - Use pep8 name for methods but keep an alias for camel case method. | |
104 | Eg: remove_attr and removeAttr works | |
105 | Fix #57 | |
106 | ||
107 | - .text() now return an empty string instead of None if there is no text node. | |
108 | Fix #45 | |
109 | ||
110 | - Fixed #23: removeClass adds class attribute to elements which previously | |
111 | lacked one | |
112 | ||
113 | ||
114 | 1.2.6 (2013-10-11) | |
115 | ------------------ | |
116 | ||
117 | README_fixt.py was not include in the release. Fix #54. | |
118 | ||
119 | ||
120 | 1.2.5 (2013-10-10) | |
121 | ------------------ | |
122 | ||
123 | cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
124 | ||
125 | tests improvments. no longer require a eth connection. | |
126 | ||
127 | fix #55 | |
128 | ||
129 | 1.2.4 | |
130 | ----- | |
131 | ||
132 | Moved to github. So a few files are renamed from .txt to .rst | |
133 | ||
134 | Added .xhtml_to_html() and .remove_namespaces() | |
135 | ||
136 | Use requests to fetch urls (if available) | |
137 | ||
138 | Use restkit's proxy instead of Paste (which will die with py3) | |
139 | ||
140 | Allow to open https urls | |
141 | ||
142 | python2.5 is no longer supported (may work, but tests are broken) | |
143 | ||
144 | 1.2.3 | |
145 | ----- | |
146 | ||
147 | Allow to pass this in .filter() callback | |
148 | ||
149 | Add .contents() .items() | |
150 | ||
151 | Add tox.ini | |
152 | ||
153 | Bug fixes: fix #35 #55 #64 #66 | |
154 | ||
155 | 1.2.2 | |
156 | ----- | |
157 | ||
158 | Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
159 | ||
160 | Fix issue #37 (Caleb Burns) | |
161 | ||
162 | 1.2.1 | |
163 | ----- | |
164 | ||
165 | Allow to use a custom css translator. | |
166 | ||
167 | Fix issue 44: case problem with xml documents | |
168 | ||
169 | 1.2 | |
170 | --- | |
171 | ||
172 | PyQuery now use `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue | |
173 | 43. | |
174 | ||
175 | Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
176 | ||
177 | 1.1.1 | |
178 | ----- | |
179 | ||
180 | Minor release. Include test file so you can run tests from the tarball. | |
181 | ||
182 | ||
183 | 1.1 | |
184 | --- | |
185 | ||
186 | fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
187 | ||
188 | ||
189 | 1.0 | |
190 | --- | |
191 | ||
192 | fix issues 24 | |
193 | ||
194 | 0.7 | |
195 | --- | |
196 | ||
197 | Python 3 compatible | |
198 | ||
199 | Add __unicode__ method | |
200 | ||
201 | Add root and encoding attribute | |
202 | ||
203 | fix issues 19, 20, 22, 23 | |
204 | ||
205 | 0.6.1 | |
206 | ------ | |
207 | ||
208 | Move README.txt at package root | |
209 | ||
210 | Add CHANGES.txt and add it to long_description | |
211 | ||
212 | 0.6 | |
213 | ---- | |
214 | ||
215 | Added PyQuery.outerHtml | |
216 | ||
217 | Added PyQuery.fn | |
218 | ||
219 | Added PyQuery.map | |
220 | ||
221 | Change PyQuery.each behavior to reflect jQuery api | |
222 | ||
223 | ||
224 | ||
225 | ||
226 | ||
227 | 10 | Keywords: jquery html xml scraping |
228 | 11 | Platform: UNKNOWN |
229 | 12 | Classifier: Intended Audience :: Developers |
230 | 13 | Classifier: Development Status :: 5 - Production/Stable |
231 | Classifier: Programming Language :: Python :: 2 | |
232 | Classifier: Programming Language :: Python :: 2.6 | |
233 | Classifier: Programming Language :: Python :: 2.7 | |
234 | 14 | Classifier: Programming Language :: Python :: 3 |
235 | Classifier: Programming Language :: Python :: 3.3 | |
236 | Classifier: Programming Language :: Python :: 3.4 | |
15 | Classifier: Programming Language :: Python :: 3.5 | |
16 | Classifier: Programming Language :: Python :: 3.6 | |
17 | Classifier: Programming Language :: Python :: 3.7 | |
18 | Provides-Extra: test | |
19 | License-File: LICENSE.txt | |
20 | ||
21 | ||
22 | pyquery: a jquery-like library for python | |
23 | ========================================= | |
24 | ||
25 | .. image:: https://travis-ci.org/gawel/pyquery.svg | |
26 | :alt: Build Status | |
27 | :target: https://travis-ci.org/gawel/pyquery | |
28 | ||
29 | pyquery allows you to make jquery queries on xml documents. | |
30 | The API is as much as possible similar to jquery. pyquery uses lxml for fast | |
31 | xml and html manipulation. | |
32 | ||
33 | This is not (or at least not yet) a library to produce or interact with | |
34 | javascript code. I just liked the jquery API and I missed it in python so I | |
35 | told myself "Hey let's make jquery in python". This is the result. | |
36 | ||
37 | The `project`_ is being actively developed on a git repository on Github. I | |
38 | have the policy of giving push access to anyone who wants it and then reviewing | |
39 | what they do. So if you want to contribute just email me. | |
40 | ||
41 | Please report bugs on the `github | |
42 | <https://github.com/gawel/pyquery/issues>`_ issue | |
43 | tracker. | |
44 | ||
45 | .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance | |
46 | .. _project: https://github.com/gawel/pyquery/ | |
47 | ||
48 | I've spent hours maintaining this software, with love. | |
49 | Please consider tipping if you like it: | |
50 | ||
51 | BTC: 1PruQAwByDndFZ7vTeJhyWefAghaZx9RZg | |
52 | ||
53 | ETH: 0xb6418036d8E06c60C4D91c17d72Df6e1e5b15CE6 | |
54 | ||
55 | LTC: LY6CdZcDbxnBX9GFBJ45TqVj8NykBBqsmT | |
56 | ||
57 | .. | |
58 | >>> (urlopen, your_url, path_to_html_file) = getfixture('readme_fixt') | |
59 | ||
60 | Quickstart | |
61 | ========== | |
62 | ||
63 | You can use the PyQuery class to load an xml document from a string, a lxml | |
64 | document, from a file or from an url:: | |
65 | ||
66 | >>> from pyquery import PyQuery as pq | |
67 | >>> from lxml import etree | |
68 | >>> import urllib | |
69 | >>> d = pq("<html></html>") | |
70 | >>> d = pq(etree.fromstring("<html></html>")) | |
71 | >>> d = pq(url=your_url) | |
72 | >>> d = pq(url=your_url, | |
73 | ... opener=lambda url, **kw: urlopen(url).read()) | |
74 | >>> d = pq(filename=path_to_html_file) | |
75 | ||
76 | Now d is like the $ in jquery:: | |
77 | ||
78 | >>> d("#hello") | |
79 | [<p#hello.hello>] | |
80 | >>> p = d("#hello") | |
81 | >>> print(p.html()) | |
82 | Hello world ! | |
83 | >>> p.html("you know <a href='http://python.org/'>Python</a> rocks") | |
84 | [<p#hello.hello>] | |
85 | >>> print(p.html()) | |
86 | you know <a href="http://python.org/">Python</a> rocks | |
87 | >>> print(p.text()) | |
88 | you know Python rocks | |
89 | ||
90 | You can use some of the pseudo classes that are available in jQuery but that | |
91 | are not standard in css such as :first :last :even :odd :eq :lt :gt :checked | |
92 | :selected :file:: | |
93 | ||
94 | >>> d('p:first') | |
95 | [<p#hello.hello>] | |
96 | ||
97 | ||
98 | ||
99 | See http://pyquery.rtfd.org/ for the full documentation | |
100 | ||
101 | News | |
102 | ==== | |
103 | ||
104 | 2.0.0 (unreleased) | |
105 | ------------------ | |
106 | ||
107 | - Breaking change: inputs starting with ``"http://"`` or ``"https://"`` like | |
108 | ``PyQuery("http://example.com")`` will no longer fetch the contents of the URL. | |
109 | Users desiring the old behavior should switch to ``PyQuery(url="http://example.com")``. | |
110 | ||
111 | - Add nextUntil method | |
112 | ||
113 | - ``.remove()`` no longer inserts a space in place of the removed element | |
114 | ||
115 | - Fix escaping of top-level element text in ``.html()`` output | |
116 | ||
117 | ||
118 | 1.4.3 (2020-11-21) | |
119 | ------------------ | |
120 | ||
121 | - No longer use a universal wheel | |
122 | ||
123 | ||
124 | 1.4.2 (2020-11-21) | |
125 | ------------------ | |
126 | ||
127 | - Fix exception raised when calling `PyQuery("<textarea></textarea>").text()` | |
128 | ||
129 | - python2 is no longer supported | |
130 | ||
131 | 1.4.1 (2019-10-26) | |
132 | ------------------ | |
133 | ||
134 | - This is the latest release with py2 support | |
135 | ||
136 | - Remove py33, py34 support | |
137 | ||
138 | - web scraping improvements: default timeout and session support | |
139 | ||
140 | - Add API methods to serialize form-related elements according to spec | |
141 | ||
142 | - Include HTML markup when querying textarea text/value | |
143 | ||
144 | ||
145 | 1.4.0 (2018-01-11) | |
146 | ------------------ | |
147 | ||
148 | - Refactoring of `.text()` to match firefox behavior. | |
149 | ||
150 | ||
151 | 1.3.0 (2017-10-21) | |
152 | ------------------ | |
153 | ||
154 | - Remove some unmaintained modules: ``pyquery.ajax`` and ``pyquery.rules`` | |
155 | ||
156 | - Code cleanup. No longer use ugly hacks required by python2.6/python3.2. | |
157 | ||
158 | - Run tests with python3.6 on CI | |
159 | ||
160 | - Add a ``method`` argument to ``.outer_html()`` | |
161 | ||
162 | ||
163 | 1.2.17 (2016-10-14) | |
164 | ------------------- | |
165 | ||
166 | - ``PyQuery('<input value="">').val()`` is ``''`` | |
167 | - ``PyQuery('<input>').val()`` is ``''`` | |
168 | ||
169 | ||
170 | 1.2.16 (2016-10-14) | |
171 | ------------------- | |
172 | ||
173 | - ``.attr('value', '')`` no longer removes the ``value`` attribute | |
174 | ||
175 | - ``<input type="checkbox">`` without ``value="..."`` have a ``.val()`` of | |
176 | ``'on'`` | |
177 | ||
178 | - ``<input type="radio">`` without ``value="..."`` have a ``.val()`` of | |
179 | ``'on'`` | |
180 | ||
181 | - ``<select>`` without ``<option selected>`` have the value of their first | |
182 | ``<option>`` (or ``None`` if there are no options) | |
183 | ||
184 | ||
185 | 1.2.15 (2016-10-11) | |
186 | ------------------- | |
187 | ||
188 | - .val() should never raise | |
189 | ||
190 | - drop py26 support | |
191 | ||
192 | - improve .extend() by returning self | |
193 | ||
194 | ||
195 | 1.2.14 (2016-10-10) | |
196 | ------------------- | |
197 | ||
198 | - fix val() for <textarea> and <select>, to match jQuery behavior | |
199 | ||
200 | ||
201 | 1.2.13 (2016-04-12) | |
202 | ------------------- | |
203 | ||
204 | - Note explicit support for Python 3.5 | |
205 | ||
206 | 1.2.12 (2016-04-12) | |
207 | ------------------- | |
208 | ||
209 | - make_links_absolute now take care of whitespaces | |
210 | ||
211 | - added pseudo selector :has() | |
212 | ||
213 | - add cookies arguments as allowed arguments for requests | |
214 | ||
215 | ||
216 | 1.2.11 (2016-02-02) | |
217 | ------------------- | |
218 | ||
219 | - Preserve namespaces attribute on PyQuery copies. | |
220 | ||
221 | - Do not raise an error when the http response code is 2XX | |
222 | ||
223 | 1.2.10 (2016-01-05) | |
224 | ------------------- | |
225 | ||
226 | - Fixed #118: implemented usage ``lxml.etree.tostring`` within ``outer_html`` method | |
227 | ||
228 | - Fixed #117: Raise HTTP Error if HTTP status code is not equal to 200 | |
229 | ||
230 | - Fixed #112: make_links_absolute does not apply to form actions | |
231 | ||
232 | - Fixed #98: contains act like jQuery | |
233 | ||
234 | ||
235 | 1.2.9 (2014-08-22) | |
236 | ------------------ | |
237 | ||
238 | - Support for keyword arguments in PyQuery custom functions | |
239 | ||
240 | - Fixed #78: items must take care or the parent | |
241 | ||
242 | - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute | |
243 | when it isn't there | |
244 | ||
245 | - Fixed #19. ``is_()`` was broken. | |
246 | ||
247 | - Fixed #9. ``.replaceWith(PyQuery element)`` raises error | |
248 | ||
249 | - Remove official python3.2 support (mostly because of 3rd party semi-deps) | |
250 | ||
251 | ||
252 | 1.2.8 (2013-12-21) | |
253 | ------------------ | |
254 | ||
255 | - Fixed #22: Open by filename fails when file contains invalid xml | |
256 | ||
257 | - Bug fix in .remove_class() | |
258 | ||
259 | ||
260 | 1.2.7 (2013-12-21) | |
261 | ------------------ | |
262 | ||
263 | - Use pep8 name for methods but keep an alias for camel case method. | |
264 | Eg: remove_attr and removeAttr works | |
265 | Fix #57 | |
266 | ||
267 | - .text() now return an empty string instead of None if there is no text node. | |
268 | Fix #45 | |
269 | ||
270 | - Fixed #23: removeClass adds class attribute to elements which previously | |
271 | lacked one | |
272 | ||
273 | ||
274 | 1.2.6 (2013-10-11) | |
275 | ------------------ | |
276 | ||
277 | - README_fixt.py was not include in the release. Fix #54. | |
278 | ||
279 | ||
280 | 1.2.5 (2013-10-10) | |
281 | ------------------ | |
282 | ||
283 | - cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 | |
284 | ||
285 | - tests improvments. no longer require a eth connection. | |
286 | ||
287 | - fix #55 | |
288 | ||
289 | 1.2.4 | |
290 | ----- | |
291 | ||
292 | - Moved to github. So a few files are renamed from .txt to .rst | |
293 | ||
294 | - Added .xhtml_to_html() and .remove_namespaces() | |
295 | ||
296 | - Use requests to fetch urls (if available) | |
297 | ||
298 | - Use restkit's proxy instead of Paste (which will die with py3) | |
299 | ||
300 | - Allow to open https urls | |
301 | ||
302 | - python2.5 is no longer supported (may work, but tests are broken) | |
303 | ||
304 | 1.2.3 | |
305 | ----- | |
306 | ||
307 | - Allow to pass this in .filter() callback | |
308 | ||
309 | - Add .contents() .items() | |
310 | ||
311 | - Add tox.ini | |
312 | ||
313 | - Bug fixes: fix #35 #55 #64 #66 | |
314 | ||
315 | 1.2.2 | |
316 | ----- | |
317 | ||
318 | - Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) | |
319 | ||
320 | - Fix issue #37 (Caleb Burns) | |
321 | ||
322 | 1.2.1 | |
323 | ----- | |
324 | ||
325 | - Allow to use a custom css translator. | |
326 | ||
327 | - Fix issue 44: case problem with xml documents | |
328 | ||
329 | 1.2 | |
330 | --- | |
331 | ||
332 | - PyQuery now uses `cssselect <http://pypi.python.org/pypi/cssselect>`_. See issue 43. | |
333 | ||
334 | - Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` | |
335 | ||
336 | 1.1.1 | |
337 | ----- | |
338 | ||
339 | - Minor release. Include test file so you can run tests from the tarball. | |
340 | ||
341 | ||
342 | 1.1 | |
343 | --- | |
344 | ||
345 | - fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support | |
346 | ||
347 | ||
348 | 1.0 | |
349 | --- | |
350 | ||
351 | - fix issues 24 | |
352 | ||
353 | 0.7 | |
354 | --- | |
355 | ||
356 | - Python 3 compatible | |
357 | ||
358 | - Add __unicode__ method | |
359 | ||
360 | - Add root and encoding attribute | |
361 | ||
362 | - fix issues 19, 20, 22, 23 | |
363 | ||
364 | 0.6.1 | |
365 | ------ | |
366 | ||
367 | - Move README.txt at package root | |
368 | ||
369 | - Add CHANGES.txt and add it to long_description | |
370 | ||
371 | 0.6 | |
372 | ---- | |
373 | ||
374 | - Added PyQuery.outerHtml | |
375 | ||
376 | - Added PyQuery.fn | |
377 | ||
378 | - Added PyQuery.map | |
379 | ||
380 | - Change PyQuery.each behavior to reflect jQuery api | |
381 | ||
382 | ||
383 | ||
384 | ||
385 | ||
386 |
0 | 0 | CHANGES.rst |
1 | LICENSE.txt | |
1 | 2 | MANIFEST.in |
2 | 3 | README.rst |
3 | 4 | README_fixt.py |
4 | buildout.cfg | |
5 | conftest.py | |
6 | pytest.ini | |
5 | 7 | setup.cfg |
6 | 8 | setup.py |
7 | 9 | tox.ini |
8 | 10 | docs/Makefile |
9 | docs/ajax.rst | |
10 | docs/ajax_fixt.py | |
11 | 11 | docs/api.rst |
12 | 12 | docs/attributes.rst |
13 | 13 | docs/changes.rst |
14 | 14 | docs/conf.py |
15 | docs/conftest.py | |
15 | 16 | docs/css.rst |
16 | 17 | docs/future.rst |
17 | 18 | docs/index.rst |
18 | 19 | docs/manipulating.rst |
19 | 20 | docs/pseudo_classes.rst |
20 | 21 | docs/scrap.rst |
21 | docs/scrap_fixt.py | |
22 | 22 | docs/testing.rst |
23 | 23 | docs/tips.rst |
24 | docs/tips_fixt.py | |
25 | 24 | docs/traversing.rst |
26 | 25 | pyquery/__init__.py |
27 | pyquery/ajax.py | |
28 | 26 | pyquery/cssselectpatch.py |
29 | 27 | pyquery/openers.py |
30 | 28 | pyquery/pyquery.py |
31 | pyquery/rules.py | |
29 | pyquery/text.py | |
32 | 30 | pyquery.egg-info/PKG-INFO |
33 | 31 | pyquery.egg-info/SOURCES.txt |
34 | 32 | pyquery.egg-info/dependency_links.txt |
38 | 36 | pyquery.egg-info/top_level.txt |
39 | 37 | tests/__init__.py |
40 | 38 | tests/apps.py |
41 | tests/compat.py | |
39 | tests/browser_base.py | |
42 | 40 | tests/doctests.rst |
41 | tests/geckodriver.sh | |
43 | 42 | tests/invalid.xml |
43 | tests/selenium.sh | |
44 | 44 | tests/test.html |
45 | tests/test_pyquery.py⏎ | |
45 | tests/test_browser.py | |
46 | tests/test_pyquery.py | |
47 | tests/test_real_browser.py⏎ |
0 | cssselect>0.7.9 | |
0 | 1 | lxml>=2.1 |
1 | cssselect | |
2 | ||
3 | [test] | |
4 | pytest | |
5 | pytest-cov | |
6 | requests | |
7 | webob | |
8 | webtest |
0 | ||
1 | [pytest] | |
2 | filterwarnings = | |
3 | ignore::DeprecationWarning | |
4 | doctest_optionflags = ELLIPSIS NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL | |
5 | addopts = --doctest-modules --doctest-glob="*.rst" --ignore=docs/conf.py |
4 | 4 | doctest-extension = rst |
5 | 5 | doctest-fixtures = _fixt |
6 | 6 | include = docs |
7 | exclude = seleniumtests | |
7 | 8 | cover-package = pyquery |
8 | 9 | with-coverage = 1 |
9 | 10 | doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE |
11 | 12 | [egg_info] |
12 | 13 | tag_build = |
13 | 14 | tag_date = 0 |
14 | tag_svn_revision = 0 | |
15 | 15 |
0 | #-*- coding:utf-8 -*- | |
0 | # -*- coding:utf-8 -*- | |
1 | 1 | # |
2 | 2 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 3 | # |
5 | 5 | |
6 | 6 | from setuptools import setup, find_packages |
7 | 7 | import os |
8 | ||
9 | ||
10 | install_requires = [ | |
11 | 'lxml>=2.1', | |
12 | 'cssselect>0.7.9', | |
13 | ] | |
8 | 14 | |
9 | 15 | |
10 | 16 | def read(*names): |
33 | 39 | |
34 | 40 | """ % read('README', 'CHANGES') |
35 | 41 | |
36 | version = '1.2.9' | |
42 | version = '2.0.0.dev0' | |
37 | 43 | |
38 | 44 | setup(name='pyquery', |
39 | 45 | version=version, |
42 | 48 | classifiers=[ |
43 | 49 | "Intended Audience :: Developers", |
44 | 50 | "Development Status :: 5 - Production/Stable", |
45 | "Programming Language :: Python :: 2", | |
46 | "Programming Language :: Python :: 2.6", | |
47 | "Programming Language :: Python :: 2.7", | |
48 | 51 | "Programming Language :: Python :: 3", |
49 | "Programming Language :: Python :: 3.3", | |
50 | "Programming Language :: Python :: 3.4", | |
52 | "Programming Language :: Python :: 3.5", | |
53 | "Programming Language :: Python :: 3.6", | |
54 | "Programming Language :: Python :: 3.7", | |
51 | 55 | ], |
52 | 56 | keywords='jquery html xml scraping', |
53 | 57 | author='Olivier Lauzanne', |
59 | 63 | packages=find_packages(exclude=[ |
60 | 64 | 'bootstrap', 'bootstrap-py3k', 'docs', 'tests', 'README_fixt' |
61 | 65 | ]), |
66 | extras_require={ | |
67 | 'test': ['requests', 'webob', 'webtest', 'pytest', 'pytest-cov'], | |
68 | }, | |
62 | 69 | include_package_data=True, |
63 | 70 | zip_safe=False, |
64 | install_requires=[ | |
65 | 'lxml>=2.1', | |
66 | 'cssselect', | |
67 | ], | |
71 | install_requires=install_requires, | |
68 | 72 | entry_points=""" |
69 | 73 | # -*- Entry points: -*- |
70 | 74 | """, |
1 | 1 | from webob import Request |
2 | 2 | from webob import Response |
3 | 3 | from webob import exc |
4 | from .compat import b | |
5 | 4 | |
6 | 5 | |
7 | 6 | def input_app(environ, start_response): |
8 | 7 | resp = Response() |
9 | 8 | req = Request(environ) |
10 | 9 | if req.path_info == '/': |
11 | resp.body = b('<input name="youyou" type="text" value="" />') | |
10 | resp.text = '<input name="youyou" type="text" value="" />' | |
12 | 11 | elif req.path_info == '/submit': |
13 | resp.body = b('<input type="submit" value="OK" />') | |
12 | resp.text = '<input type="submit" value="OK" />' | |
14 | 13 | elif req.path_info.startswith('/html'): |
15 | resp.body = b('<html><p>Success</p></html>') | |
14 | resp.text = '<html><p>Success</p></html>' | |
16 | 15 | else: |
17 | resp.body = '' | |
16 | resp.text = '<html></html>' | |
18 | 17 | return resp(environ, start_response) |
19 | 18 | |
20 | 19 | |
22 | 21 | req = Request(environ) |
23 | 22 | response = Response() |
24 | 23 | if req.method == 'GET': |
25 | response.body = b('<pre>Yeah !</pre>') | |
24 | response.text = '<pre>Yeah !</pre>' | |
26 | 25 | else: |
27 | response.body = b('<a href="/plop">Yeah !</a>') | |
26 | response.text = '<a href="/plop">Yeah !</a>' | |
28 | 27 | return response(environ, start_response) |
29 | 28 | |
30 | 29 |
0 | ||
1 | class TextExtractionMixin(): | |
2 | def _prepare_dom(self, html): | |
3 | self.last_html = '<html><body>' + html + '</body></html>' | |
4 | ||
5 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
6 | raise NotImplementedError | |
7 | ||
8 | def test_inline_tags(self): | |
9 | self._simple_test( | |
10 | 'Phas<em>ell</em>us<i> eget </i>sem <b>facilisis</b> justo', | |
11 | 'Phasellus eget sem facilisis justo', | |
12 | 'Phasellus eget sem facilisis justo', | |
13 | ) | |
14 | self._simple_test( | |
15 | 'Phasellus <span> eget </span> sem <b>facilisis\n</b> justo', | |
16 | 'Phasellus eget sem facilisis justo', | |
17 | 'Phasellus eget sem facilisis\n justo', | |
18 | ) | |
19 | self._simple_test( | |
20 | ('Phasellus <span>\n eget\n ' | |
21 | 'sem\n\tfacilisis</span> justo'), | |
22 | 'Phasellus eget sem facilisis justo', | |
23 | 'Phasellus \n eget\n sem\n\tfacilisis justo' | |
24 | ) | |
25 | ||
26 | def test_block_tags(self): | |
27 | self._simple_test( | |
28 | 'Phas<p>ell</p>us<div> eget </div>sem <h1>facilisis</h1> justo', | |
29 | 'Phas\nell\nus\neget\nsem\nfacilisis\njusto', | |
30 | 'Phas\nell\nus\n eget \nsem \nfacilisis\n justo', | |
31 | ) | |
32 | self._simple_test( | |
33 | '<p>In sagittis</p> <p>rutrum</p><p>condimentum</p>', | |
34 | 'In sagittis\nrutrum\ncondimentum', | |
35 | 'In sagittis\n \nrutrum\n\ncondimentum', | |
36 | ) | |
37 | self._simple_test( | |
38 | 'In <p>\nultricies</p>\n erat et <p>\n\n\nmaximus\n\n</p> mollis', | |
39 | 'In\nultricies\nerat et\nmaximus\nmollis', | |
40 | 'In \n\nultricies\n\n erat et \n\n\n\nmaximus\n\n\n mollis', | |
41 | ) | |
42 | self._simple_test( | |
43 | ('Integer <div><div>\n <div>quis commodo</div></div> ' | |
44 | '</div> libero'), | |
45 | 'Integer\nquis commodo\nlibero', | |
46 | 'Integer \n\n\n \nquis commodo\n\n \n libero', | |
47 | ) | |
48 | self._simple_test( | |
49 | 'Heading<ul><li>one</li><li>two</li><li>three</li></ul>', | |
50 | 'Heading\none\ntwo\nthree', | |
51 | 'Heading\n\none\n\ntwo\n\nthree', | |
52 | ) | |
53 | ||
54 | def test_separators(self): | |
55 | self._simple_test( | |
56 | 'Some words<br>test. Another word<br><br> <br> test.', | |
57 | 'Some words\ntest. Another word\n\n\ntest.', | |
58 | 'Some words\ntest. Another word\n\n \n test.', | |
59 | ) | |
60 | self._simple_test( | |
61 | 'Inline <span> splitted by\nbr<br>tag</span> test', | |
62 | 'Inline splitted by br\ntag test', | |
63 | 'Inline splitted by\nbr\ntag test', | |
64 | ) | |
65 | self._simple_test( | |
66 | 'Some words<hr>test. Another word<hr><hr> <hr> test.', | |
67 | 'Some words\ntest. Another word\ntest.', | |
68 | 'Some words\n\ntest. Another word\n\n\n\n \n\n test.', | |
69 | ) | |
70 | ||
71 | def test_strip(self): | |
72 | self._simple_test( | |
73 | ' text\n', | |
74 | 'text', | |
75 | ' text\n', | |
76 | ) | |
77 | ||
78 | def test_ul_li(self): | |
79 | self._simple_test( | |
80 | '<ul> <li> </li> </ul>', | |
81 | '', | |
82 | ' \n \n ' | |
83 | ) |
0 | # -*- coding: utf-8 -*- | |
1 | import sys | |
2 | ||
3 | PY3k = sys.version_info >= (3,) | |
4 | ||
5 | if PY3k: | |
6 | text_type = str | |
7 | ||
8 | def u(value, encoding): | |
9 | return str(value) | |
10 | ||
11 | def b(value): | |
12 | return value.encode('utf-8') | |
13 | else: | |
14 | text_type = unicode | |
15 | ||
16 | def u(value, encoding): # NOQA | |
17 | return unicode(value, encoding) | |
18 | ||
19 | def b(value): # NOQA | |
20 | return str(value) | |
21 | ||
22 | try: | |
23 | from unittest2 import TestCase | |
24 | except ImportError: | |
25 | from unittest import TestCase # NOQA |
0 | #!/bin/bash | |
1 | ||
2 | driver="https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz" | |
3 | ||
4 | [ -f geckodriver ] || wget -cqO- $driver | tar xvzf - |
0 | #!/bin/bash | |
1 | # script to run selenium tests | |
2 | ||
3 | # get geckodriver | |
4 | ./tests/geckodriver.sh | |
5 | ||
6 | # run tox with py3.7 | |
7 | MOZ_HEADLESS=1 PATH=$PATH:$PWD tox -e py37 tests/test_real_browser.py |
0 | import unittest | |
1 | ||
2 | from pyquery.pyquery import PyQuery | |
3 | from .browser_base import TextExtractionMixin | |
4 | ||
5 | ||
6 | class TestInnerText(unittest.TestCase, TextExtractionMixin): | |
7 | def _prepare_dom(self, html): | |
8 | super(TestInnerText, self)._prepare_dom(html) | |
9 | self.pq = PyQuery(self.last_html) | |
10 | ||
11 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
12 | self._prepare_dom(html) | |
13 | text_sq = self.pq.text(squash_space=True, **kwargs) | |
14 | text_nosq = self.pq.text(squash_space=False, **kwargs) | |
15 | self.assertEqual(text_sq, expected_sq) | |
16 | self.assertEqual(text_nosq, expected_nosq) |
0 | #-*- coding:utf-8 -*- | |
1 | # | |
2 | 0 | # Copyright (C) 2008 - Olivier Lauzanne <olauzanne@gmail.com> |
3 | 1 | # |
4 | 2 | # Distributed under the BSD license, see LICENSE.txt |
5 | 3 | import os |
6 | 4 | import sys |
7 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
5 | import time | |
8 | 6 | from lxml import etree |
9 | from pyquery.pyquery import PyQuery as pq | |
10 | from pyquery.ajax import PyQuery as pqa | |
7 | from pyquery.pyquery import PyQuery as pq, no_default | |
8 | from pyquery.openers import HAS_REQUEST | |
11 | 9 | from webtest import http |
12 | 10 | from webtest.debugapp import debug_app |
13 | from .apps import application | |
14 | from .apps import secure_application | |
15 | from .compat import PY3k | |
16 | from .compat import u | |
17 | from .compat import b | |
18 | from .compat import text_type | |
19 | from .compat import TestCase | |
20 | ||
21 | ||
22 | def not_py3k(func): | |
23 | if not PY3k: | |
24 | return func | |
25 | ||
26 | try: | |
27 | import requests # NOQA | |
28 | HAS_REQUEST = True | |
29 | except ImportError: | |
30 | HAS_REQUEST = False | |
11 | from unittest import TestCase | |
12 | ||
13 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) | |
31 | 14 | |
32 | 15 | |
33 | 16 | dirname = os.path.dirname(os.path.abspath(__file__)) |
39 | 22 | class TestUnicode(TestCase): |
40 | 23 | |
41 | 24 | def test_unicode(self): |
42 | xml = pq(u("<html><p>é</p></html>", 'utf-8')) | |
43 | self.assertEqual(type(xml.html()), text_type) | |
44 | if PY3k: | |
45 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
46 | self.assertEqual(str(xml('p:contains("é")')), '<p>é</p>') | |
47 | else: | |
48 | self.assertEqual(unicode(xml), u("<html><p>é</p></html>", 'utf-8')) | |
49 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
50 | self.assertEqual(str(xml(u('p:contains("é")', 'utf8'))), | |
51 | '<p>é</p>') | |
52 | self.assertEqual(unicode(xml(u('p:contains("é")', 'utf8'))), | |
53 | u('<p>é</p>', 'utf8')) | |
25 | xml = pq(u"<html><p>é</p></html>") | |
26 | self.assertEqual(type(xml.html()), str) | |
27 | self.assertEqual(str(xml), '<html><p>é</p></html>') | |
28 | self.assertEqual(str(xml('p:contains("é")')), '<p>é</p>') | |
54 | 29 | |
55 | 30 | |
56 | 31 | class TestAttributeCase(TestCase): |
101 | 76 | <body> |
102 | 77 | <form action="/"> |
103 | 78 | <input name="enabled" type="text" value="test"/> |
79 | <b disabled>Not :disabled</b> | |
104 | 80 | <input name="disabled" type="text" |
105 | 81 | value="disabled" disabled="disabled"/> |
82 | <fieldset> | |
83 | <input name="fieldset-enabled"> | |
84 | </fieldset> | |
85 | <fieldset disabled> | |
86 | <legend> | |
87 | <input name="legend-enabled"> | |
88 | </legend> | |
89 | <input name="fieldset-disabled"> | |
90 | <legend> | |
91 | <input name="legend-disabled"> | |
92 | </legend> | |
93 | <select id="disabled-select"> | |
94 | <optgroup> | |
95 | <option></option> | |
96 | </optgroup> | |
97 | </select> | |
98 | </fieldset> | |
99 | <select> | |
100 | <optgroup id="disabled-optgroup" disabled> | |
101 | <option id="disabled-from-optgroup"></option> | |
102 | <option id="disabled-option" disabled></option> | |
103 | </optgroup> | |
104 | </select> | |
106 | 105 | <input name="file" type="file" /> |
107 | 106 | <select name="select"> |
108 | 107 | <option value="">Choose something</option> |
134 | 133 | <h4>Heading 4</h4> |
135 | 134 | <h5>Heading 5</h5> |
136 | 135 | <h6>Heading 6</h6> |
136 | <div></div> | |
137 | 137 | </body> |
138 | 138 | </html> |
139 | 139 | """ |
140 | 140 | |
141 | 141 | def test_get_root(self): |
142 | doc = pq(b('<?xml version="1.0" encoding="UTF-8"?><root><p/></root>')) | |
142 | doc = pq(b'<?xml version="1.0" encoding="UTF-8"?><root><p/></root>') | |
143 | 143 | self.assertEqual(isinstance(doc.root, etree._ElementTree), True) |
144 | 144 | self.assertEqual(doc.encoding, 'UTF-8') |
145 | ||
146 | child = doc.children().eq(0) | |
147 | self.assertNotEqual(child._parent, no_default) | |
148 | self.assertTrue(isinstance(child.root, etree._ElementTree)) | |
145 | 149 | |
146 | 150 | def test_selector_from_doc(self): |
147 | 151 | doc = etree.fromstring(self.html) |
182 | 186 | self.assertEqual(e('div:lt(1)').text(), 'node1') |
183 | 187 | self.assertEqual(e('div:eq(2)').text(), 'node3') |
184 | 188 | |
185 | #test on the form | |
189 | # test on the form | |
186 | 190 | e = self.klass(self.html4) |
187 | assert len(e(':disabled')) == 1 | |
188 | assert len(e('input:enabled')) == 9 | |
191 | disabled = e(':disabled') | |
192 | self.assertIn(e('[name="disabled"]')[0], disabled) | |
193 | self.assertIn(e('fieldset[disabled]')[0], disabled) | |
194 | self.assertIn(e('[name="legend-disabled"]')[0], disabled) | |
195 | self.assertIn(e('[name="fieldset-disabled"]')[0], disabled) | |
196 | self.assertIn(e('#disabled-optgroup')[0], disabled) | |
197 | self.assertIn(e('#disabled-from-optgroup')[0], disabled) | |
198 | self.assertIn(e('#disabled-option')[0], disabled) | |
199 | self.assertIn(e('#disabled-select')[0], disabled) | |
200 | ||
201 | assert len(disabled) == 8 | |
202 | assert len(e('select:enabled')) == 2 | |
203 | assert len(e('input:enabled')) == 11 | |
189 | 204 | assert len(e(':selected')) == 1 |
190 | 205 | assert len(e(':checked')) == 2 |
191 | 206 | assert len(e(':file')) == 1 |
192 | assert len(e(':input')) == 12 | |
207 | assert len(e(':input')) == 18 | |
193 | 208 | assert len(e(':button')) == 2 |
194 | 209 | assert len(e(':radio')) == 3 |
195 | 210 | assert len(e(':checkbox')) == 3 |
196 | 211 | |
197 | #test on other elements | |
212 | # test on other elements | |
198 | 213 | e = self.klass(self.html5) |
199 | 214 | assert len(e(":header")) == 6 |
200 | 215 | assert len(e(":parent")) == 2 |
201 | assert len(e(":empty")) == 6 | |
202 | assert len(e(":contains('Heading')")) == 6 | |
216 | assert len(e(":empty")) == 1 | |
217 | assert len(e(":contains('Heading')")) == 8 | |
203 | 218 | |
204 | 219 | def test_on_the_fly_dom_creation(self): |
205 | 220 | e = self.klass(self.html) |
219 | 234 | </html> |
220 | 235 | """ |
221 | 236 | |
237 | html2 = """ | |
238 | <html> | |
239 | <body> | |
240 | <dl> | |
241 | <dt id="term-1">term 1</dt> | |
242 | <dd>definition 1-a</dd> | |
243 | <dd>definition 1-b</dd> | |
244 | <dd>definition 1-c</dd> | |
245 | <dd>definition 1-d</dd> | |
246 | <dt id="term-2">term 2</dt> | |
247 | <dd>definition 2-a</dd> | |
248 | <dd class="strange">definition 2-b</dd> | |
249 | <dd>definition 2-c</dd> | |
250 | <dt id="term-3">term 3</dt> | |
251 | <dd>definition 3-a</dd> | |
252 | <dd>definition 3-b</dd> | |
253 | </dl> | |
254 | </body> | |
255 | </html> | |
256 | """ | |
257 | ||
222 | 258 | def test_filter(self): |
223 | 259 | assert len(self.klass('div', self.html).filter('.node3')) == 1 |
224 | 260 | assert len(self.klass('div', self.html).filter('#node2')) == 1 |
262 | 298 | self.html).closest('.node3').attr('id') == 'node2' |
263 | 299 | assert self.klass('.node3', self.html).closest('form') == [] |
264 | 300 | |
301 | def test_next_all(self): | |
302 | d = pq(self.html2) | |
303 | ||
304 | # without filter | |
305 | self.assertEqual( | |
306 | len(d('#term-2').next_all()), 6) | |
307 | # with filter | |
308 | self.assertEqual( | |
309 | len(d('#term-2').next_all('dd')), 5) | |
310 | # when empty | |
311 | self.assertEqual( | |
312 | d('#NOTHING').next_all(), []) | |
313 | ||
314 | def test_next_until(self): | |
315 | d = pq(self.html2) | |
316 | ||
317 | # without filter | |
318 | self.assertEqual( | |
319 | len(d('#term-2').next_until('dt')), 3) | |
320 | # with filter | |
321 | self.assertEqual( | |
322 | len(d('#term-2').next_until('dt', ':not(.strange)')), 2) | |
323 | # when empty | |
324 | self.assertEqual( | |
325 | d('#NOTHING').next_until('*'), []) | |
326 | ||
265 | 327 | |
266 | 328 | class TestOpener(TestCase): |
267 | 329 | |
279 | 341 | |
280 | 342 | doc = pq(url='http://example.com', opener=opener) |
281 | 343 | assert len(doc('.node')) == 1, doc |
344 | ||
345 | ||
346 | class TestConstruction(TestCase): | |
347 | ||
348 | def test_typeerror_on_invalid_value(self): | |
349 | self.assertRaises(TypeError, pq, object()) | |
282 | 350 | |
283 | 351 | |
284 | 352 | class TestComment(TestCase): |
299 | 367 | |
300 | 368 | def test_S_this_inside_callback(self): |
301 | 369 | S = pq(self.html) |
302 | self.assertEqual(S('li').map(lambda i, el: S(this).html()), # NOQA | |
303 | ['Coffee', 'Tea', 'Milk']) | |
370 | self.assertEqual(S('li').map( | |
371 | lambda i, el: S(this).html()), # NOQA | |
372 | ['Coffee', 'Tea', 'Milk'] | |
373 | ) | |
304 | 374 | |
305 | 375 | def test_parameterless_callback(self): |
306 | 376 | S = pq(self.html) |
307 | self.assertEqual(S('li').map(lambda: S(this).html()), # NOQA | |
308 | ['Coffee', 'Tea', 'Milk']) | |
377 | self.assertEqual(S('li').map( | |
378 | lambda: S(this).html()), # NOQA | |
379 | ['Coffee', 'Tea', 'Milk'] | |
380 | ) | |
309 | 381 | |
310 | 382 | |
311 | 383 | class TestHook(TestCase): |
319 | 391 | |
320 | 392 | def test_fn(self): |
321 | 393 | "Example from `PyQuery.Fn` docs." |
322 | fn = lambda: this.map(lambda i, el: pq(this).outerHtml()) | |
394 | fn = lambda: this.map(lambda i, el: pq(this).outerHtml()) # NOQA | |
323 | 395 | pq.fn.listOuterHtml = fn |
324 | 396 | S = pq(self.html) |
325 | 397 | self.assertEqual(S('li').listOuterHtml(), |
327 | 399 | |
328 | 400 | def test_fn_with_kwargs(self): |
329 | 401 | "fn() with keyword arguments." |
330 | pq.fn.test = lambda p=1: pq(this).eq(p) | |
402 | pq.fn.test = lambda p=1: pq(this).eq(p) # NOQA | |
331 | 403 | S = pq(self.html) |
332 | 404 | self.assertEqual(S('li').test(0).text(), 'Coffee') |
333 | 405 | self.assertEqual(S('li').test().text(), 'Tea') |
334 | 406 | self.assertEqual(S('li').test(p=2).text(), 'Milk') |
335 | ||
336 | ||
337 | class TestAjaxSelector(TestSelector): | |
338 | klass = pqa | |
339 | ||
340 | def setUp(self): | |
341 | self.s = http.StopableWSGIServer.create(application) | |
342 | ||
343 | @not_py3k | |
344 | def test_proxy(self): | |
345 | self.s.wait() | |
346 | application_url = self.s.application_url | |
347 | e = self.klass([]) | |
348 | val = e.get(application_url) | |
349 | assert len(val('pre')) == 1, (str(val.response), val) | |
350 | ||
351 | def test_get(self): | |
352 | e = self.klass(app=application) | |
353 | val = e.get('/') | |
354 | assert len(val('pre')) == 1, val | |
355 | ||
356 | def test_secure_get(self): | |
357 | e = self.klass(app=secure_application) | |
358 | val = e.get('/', environ=dict(REMOTE_USER='gawii')) | |
359 | assert len(val('pre')) == 1, val | |
360 | val = e.get('/', REMOTE_USER='gawii') | |
361 | assert len(val('pre')) == 1, val | |
362 | ||
363 | def test_secure_get_not_authorized(self): | |
364 | e = self.klass(app=secure_application) | |
365 | val = e.get('/') | |
366 | assert len(val('pre')) == 0, val | |
367 | ||
368 | def test_post(self): | |
369 | e = self.klass(app=application) | |
370 | val = e.post('/') | |
371 | assert len(val('a')) == 1, val | |
372 | ||
373 | def test_subquery(self): | |
374 | e = self.klass(app=application) | |
375 | n = e('div') | |
376 | val = n.post('/') | |
377 | assert len(val('a')) == 1, val | |
378 | ||
379 | def tearDown(self): | |
380 | self.s.shutdown() | |
381 | 407 | |
382 | 408 | |
383 | 409 | class TestManipulating(TestCase): |
388 | 414 | </div> |
389 | 415 | ''' |
390 | 416 | |
417 | html2 = ''' | |
418 | <input name="spam" value="Spam"> | |
419 | <input name="eggs" value="Eggs"> | |
420 | <input type="checkbox" value="Bacon"> | |
421 | <input type="radio" value="Ham"> | |
422 | ''' | |
423 | ||
424 | html2_newline = ''' | |
425 | <input id="newline-text" type="text" name="order" value="S | |
426 | pam"> | |
427 | <input id="newline-radio" type="radio" name="order" value="S | |
428 | pam"> | |
429 | ''' | |
430 | ||
431 | html3 = ''' | |
432 | <textarea id="textarea-single">Spam</textarea> | |
433 | <textarea id="textarea-multi">Spam | |
434 | <b>Eggs</b> | |
435 | Bacon</textarea> | |
436 | ''' | |
437 | ||
438 | html4 = ''' | |
439 | <select id="first"> | |
440 | <option value="spam">Spam</option> | |
441 | <option value="eggs">Eggs</option> | |
442 | </select> | |
443 | <select id="second"> | |
444 | <option value="spam">Spam</option> | |
445 | <option value="eggs" selected>Eggs</option> | |
446 | <option value="bacon">Bacon</option> | |
447 | </select> | |
448 | <select id="third"> | |
449 | </select> | |
450 | <select id="fourth"> | |
451 | <option value="spam">Spam</option> | |
452 | <option value="spam">Eggs</option> | |
453 | <option value="spam">Bacon</option> | |
454 | </select> | |
455 | ''' | |
456 | ||
457 | html6 = ''' | |
458 | <select id="first" multiple> | |
459 | <option value="spam" selected>Spam</option> | |
460 | <option value="eggs" selected>Eggs</option> | |
461 | <option value="bacon">Bacon</option> | |
462 | </select> | |
463 | <select id="second" multiple> | |
464 | <option value="spam">Spam</option> | |
465 | <option value="eggs">Eggs</option> | |
466 | <option value="bacon">Bacon</option> | |
467 | </select> | |
468 | <select id="third" multiple> | |
469 | <option value="spam">Spam</option> | |
470 | <option value="spam">Eggs</option> | |
471 | <option value="spam">Bacon</option> | |
472 | </select> | |
473 | ''' | |
474 | ||
475 | html5 = ''' | |
476 | <div> | |
477 | <input id="first" value="spam"> | |
478 | <input id="second" value="eggs"> | |
479 | <textarea id="third">bacon</textarea> | |
480 | </div> | |
481 | ''' | |
482 | ||
483 | def test_attr_empty_string(self): | |
484 | d = pq('<div>') | |
485 | d.attr('value', '') | |
486 | self.assertEqual(d.outer_html(), '<div value=""></div>') | |
487 | self.assertEqual(d.outer_html(method="xml"), '<div value=""/>') | |
488 | ||
391 | 489 | def test_remove(self): |
392 | 490 | d = pq(self.html) |
393 | 491 | d('img').remove() |
394 | 492 | val = d('a:first').html() |
395 | assert val == 'Test My link text', repr(val) | |
493 | assert val == 'TestMy link text', repr(val) | |
396 | 494 | val = d('a:last').html() |
397 | assert val == ' My link text 2', repr(val) | |
495 | assert val == 'My link text 2', repr(val) | |
398 | 496 | |
399 | 497 | def test_class(self): |
400 | 498 | d = pq('<div></div>') |
401 | 499 | d.removeClass('xx') |
402 | 500 | assert 'class' not in str(d), str(d) |
501 | ||
502 | def test_val_for_inputs(self): | |
503 | d = pq(self.html2) | |
504 | self.assertIsNone(d('input[name="none"]').val()) | |
505 | self.assertEqual(d('input[name="spam"]').val(), 'Spam') | |
506 | self.assertEqual(d('input[name="eggs"]').val(), 'Eggs') | |
507 | self.assertEqual(d('input:checkbox').val(), 'Bacon') | |
508 | self.assertEqual(d('input:radio').val(), 'Ham') | |
509 | d('input[name="spam"]').val('42') | |
510 | d('input[name="eggs"]').val('43') | |
511 | d('input:checkbox').val('44') | |
512 | d('input:radio').val('45') | |
513 | self.assertEqual(d('input[name="spam"]').val(), '42') | |
514 | self.assertEqual(d('input[name="eggs"]').val(), '43') | |
515 | self.assertEqual(d('input:checkbox').val(), '44') | |
516 | self.assertEqual(d('input:radio').val(), '45') | |
517 | ||
518 | def test_val_for_inputs_with_newline(self): | |
519 | d = pq(self.html2_newline) | |
520 | self.assertEqual(d('#newline-text').val(), 'Spam') | |
521 | self.assertEqual(d('#newline-radio').val(), 'S\npam') | |
522 | ||
523 | def test_val_for_textarea(self): | |
524 | d = pq(self.html3) | |
525 | self.assertEqual(d('#textarea-single').val(), 'Spam') | |
526 | self.assertEqual(d('#textarea-single').text(), 'Spam') | |
527 | d('#textarea-single').val('42') | |
528 | self.assertEqual(d('#textarea-single').val(), '42') | |
529 | # Note: jQuery still returns 'Spam' here. | |
530 | self.assertEqual(d('#textarea-single').text(), '42') | |
531 | ||
532 | multi_expected = '''Spam\n<b>Eggs</b>\nBacon''' | |
533 | self.assertEqual(d('#textarea-multi').val(), multi_expected) | |
534 | self.assertEqual(d('#textarea-multi').text(), multi_expected) | |
535 | multi_new = '''Bacon\n<b>Eggs</b>\nSpam''' | |
536 | multi_new_expected = '''Bacon\n<b>Eggs</b>\nSpam''' | |
537 | d('#textarea-multi').val(multi_new) | |
538 | self.assertEqual(d('#textarea-multi').val(), multi_new_expected) | |
539 | self.assertEqual(d('#textarea-multi').text(), multi_new_expected) | |
540 | ||
541 | def test_val_for_select(self): | |
542 | d = pq(self.html4) | |
543 | self.assertEqual(d('#first').val(), 'spam') | |
544 | self.assertEqual(d('#second').val(), 'eggs') | |
545 | self.assertIsNone(d('#third').val()) | |
546 | d('#first').val('eggs') | |
547 | d('#second').val('bacon') | |
548 | d('#third').val('eggs') # Selecting non-existing option. | |
549 | self.assertEqual(d('#first').val(), 'eggs') | |
550 | self.assertEqual(d('#second').val(), 'bacon') | |
551 | self.assertIsNone(d('#third').val()) | |
552 | d('#first').val('bacon') # Selecting non-existing option. | |
553 | self.assertEqual(d('#first').val(), 'spam') | |
554 | # Value set based on option order, not value order | |
555 | d('#second').val(['bacon', 'eggs']) | |
556 | self.assertEqual(d('#second').val(), 'eggs') | |
557 | d('#fourth').val(['spam']) | |
558 | self.assertEqual(d('#fourth').val(), 'spam') | |
559 | # Sets first option with matching value | |
560 | self.assertEqual(d('#fourth option[selected]').length, 1) | |
561 | self.assertEqual(d('#fourth option[selected]').text(), 'Spam') | |
562 | ||
563 | def test_val_for_select_multiple(self): | |
564 | d = pq(self.html6) | |
565 | self.assertEqual(d('#first').val(), ['spam', 'eggs']) | |
566 | # Selecting non-existing option. | |
567 | d('#first').val(['eggs', 'sausage', 'bacon']) | |
568 | self.assertEqual(d('#first').val(), ['eggs', 'bacon']) | |
569 | self.assertEqual(d('#second').val(), []) | |
570 | d('#second').val('eggs') | |
571 | self.assertEqual(d('#second').val(), ['eggs']) | |
572 | d('#second').val(['not spam', 'not eggs']) | |
573 | self.assertEqual(d('#second').val(), []) | |
574 | d('#third').val(['spam']) | |
575 | self.assertEqual(d('#third').val(), ['spam', 'spam', 'spam']) | |
576 | ||
577 | def test_val_for_input_and_textarea_given_array_value(self): | |
578 | d = pq('<input type="text">') | |
579 | d('input').val(['spam', 'eggs']) | |
580 | self.assertEqual(d('input').val(), 'spam,eggs') | |
581 | d = pq('<textarea></textarea>') | |
582 | d('textarea').val(['spam', 'eggs']) | |
583 | self.assertEqual(d('textarea').val(), 'spam,eggs') | |
584 | ||
585 | def test_val_for_multiple_elements(self): | |
586 | d = pq(self.html5) | |
587 | # "Get" returns *first* value. | |
588 | self.assertEqual(d('div > *').val(), 'spam') | |
589 | # "Set" updates *every* value. | |
590 | d('div > *').val('42') | |
591 | self.assertEqual(d('#first').val(), '42') | |
592 | self.assertEqual(d('#second').val(), '42') | |
593 | self.assertEqual(d('#third').val(), '42') | |
594 | ||
595 | def test_val_checkbox_no_value_attribute(self): | |
596 | d = pq('<input type="checkbox">') | |
597 | self.assertEqual(d.val(), 'on') | |
598 | d = pq('<input type="checkbox" value="">') | |
599 | self.assertEqual(d.val(), '') | |
600 | ||
601 | def test_val_radio_no_value_attribute(self): | |
602 | d = pq('<input type="radio">') | |
603 | self.assertEqual(d.val(), 'on') | |
604 | ||
605 | def test_val_value_is_empty_string(self): | |
606 | d = pq('<input value="">') | |
607 | self.assertEqual(d.val(), '') | |
608 | ||
609 | def test_val_input_has_no_value_attr(self): | |
610 | d = pq('<input>') | |
611 | self.assertEqual(d.val(), '') | |
612 | ||
613 | def test_html_replacement(self): | |
614 | html = '<div>Not Me<span>Replace Me</span>Not Me</div>' | |
615 | replacement = 'New <em>Contents</em> New' | |
616 | expected = html.replace('Replace Me', replacement) | |
617 | ||
618 | d = pq(html) | |
619 | d.find('span').html(replacement) | |
620 | ||
621 | new_html = d.outerHtml() | |
622 | self.assertEqual(new_html, expected) | |
623 | self.assertIn(replacement, new_html) | |
624 | ||
625 | def test_html_escape(self): | |
626 | inner_html = 'encoded <script> tag with "quotes".' \ | |
627 | '<span>nested <tag></span>' | |
628 | html = '<div>' + inner_html + '</div>' | |
629 | d = pq(html) | |
630 | self.assertEqual(d.html(), inner_html) | |
631 | ||
632 | ||
633 | class TestAjax(TestCase): | |
634 | ||
635 | html = ''' | |
636 | <div id="div"> | |
637 | <input form="dispersed" name="order" value="spam"> | |
638 | </div> | |
639 | <form id="dispersed"> | |
640 | <div><input name="order" value="eggs"></div> | |
641 | <input form="dispersed" name="order" value="ham"> | |
642 | <input form="other-form" name="order" value="nothing"> | |
643 | <input form="" name="order" value="nothing"> | |
644 | </form> | |
645 | <form id="other-form"> | |
646 | <input form="dispersed" name="order" value="tomato"> | |
647 | </form> | |
648 | <form class="no-id"> | |
649 | <input form="dispersed" name="order" value="baked beans"> | |
650 | <input name="spam" value="Spam"> | |
651 | </form> | |
652 | ''' | |
653 | ||
654 | html2 = ''' | |
655 | <form id="first"> | |
656 | <input name="order" value="spam"> | |
657 | <fieldset> | |
658 | <input name="fieldset" value="eggs"> | |
659 | <input id="input" name="fieldset" value="ham"> | |
660 | </fieldset> | |
661 | </form> | |
662 | <form id="datalist"> | |
663 | <datalist><div><input name="datalist" value="eggs"></div></datalist> | |
664 | <input type="checkbox" name="checkbox" checked> | |
665 | <input type="radio" name="radio" checked> | |
666 | </form> | |
667 | ''' | |
668 | ||
669 | html3 = ''' | |
670 | <form> | |
671 | <input name="order" value="spam"> | |
672 | <input id="noname" value="sausage"> | |
673 | <fieldset disabled> | |
674 | <input name="order" value="sausage"> | |
675 | </fieldset> | |
676 | <input name="disabled" value="ham" disabled> | |
677 | <input type="submit" name="submit" value="Submit"> | |
678 | <input type="button" name="button" value=""> | |
679 | <input type="image" name="image" value=""> | |
680 | <input type="reset" name="reset" value="Reset"> | |
681 | <input type="file" name="file" value=""> | |
682 | <button type="submit" name="submit" value="submit"></button> | |
683 | <input type="checkbox" name="spam"> | |
684 | <input type="radio" name="eggs"> | |
685 | </form> | |
686 | ''' | |
687 | ||
688 | html4 = ''' | |
689 | <form> | |
690 | <input name="spam" value="Spam/ | |
691 | spam"> | |
692 | <select name="order" multiple> | |
693 | <option value="baked | |
694 | beans" selected> | |
695 | <option value="tomato" selected> | |
696 | <option value="spam"> | |
697 | </select> | |
698 | <textarea name="multiline">multiple | |
699 | lines | |
700 | of text</textarea> | |
701 | </form> | |
702 | ''' | |
703 | ||
704 | def test_serialize_pairs_form_id(self): | |
705 | d = pq(self.html) | |
706 | self.assertEqual(d('#div').serialize_pairs(), []) | |
707 | self.assertEqual(d('#dispersed').serialize_pairs(), [ | |
708 | ('order', 'spam'), ('order', 'eggs'), ('order', 'ham'), | |
709 | ('order', 'tomato'), ('order', 'baked beans'), | |
710 | ]) | |
711 | self.assertEqual(d('.no-id').serialize_pairs(), [ | |
712 | ('spam', 'Spam'), | |
713 | ]) | |
714 | ||
715 | def test_serialize_pairs_form_controls(self): | |
716 | d = pq(self.html2) | |
717 | self.assertEqual(d('fieldset').serialize_pairs(), [ | |
718 | ('fieldset', 'eggs'), ('fieldset', 'ham'), | |
719 | ]) | |
720 | self.assertEqual(d('#input, fieldset, #first').serialize_pairs(), [ | |
721 | ('order', 'spam'), ('fieldset', 'eggs'), ('fieldset', 'ham'), | |
722 | ('fieldset', 'eggs'), ('fieldset', 'ham'), ('fieldset', 'ham'), | |
723 | ]) | |
724 | self.assertEqual(d('#datalist').serialize_pairs(), [ | |
725 | ('datalist', 'eggs'), ('checkbox', 'on'), ('radio', 'on'), | |
726 | ]) | |
727 | ||
728 | def test_serialize_pairs_filter_controls(self): | |
729 | d = pq(self.html3) | |
730 | self.assertEqual(d('form').serialize_pairs(), [ | |
731 | ('order', 'spam') | |
732 | ]) | |
733 | ||
734 | def test_serialize_pairs_form_values(self): | |
735 | d = pq(self.html4) | |
736 | self.assertEqual(d('form').serialize_pairs(), [ | |
737 | ('spam', 'Spam/spam'), ('order', 'baked\r\nbeans'), | |
738 | ('order', 'tomato'), ('multiline', 'multiple\r\nlines\r\nof text'), | |
739 | ]) | |
740 | ||
741 | def test_serialize_array(self): | |
742 | d = pq(self.html4) | |
743 | self.assertEqual(d('form').serialize_array(), [ | |
744 | {'name': 'spam', 'value': 'Spam/spam'}, | |
745 | {'name': 'order', 'value': 'baked\r\nbeans'}, | |
746 | {'name': 'order', 'value': 'tomato'}, | |
747 | {'name': 'multiline', 'value': 'multiple\r\nlines\r\nof text'}, | |
748 | ]) | |
749 | ||
750 | def test_serialize(self): | |
751 | d = pq(self.html4) | |
752 | self.assertEqual( | |
753 | d('form').serialize(), | |
754 | 'spam=Spam%2Fspam&order=baked%0D%0Abeans&order=tomato&' | |
755 | 'multiline=multiple%0D%0Alines%0D%0Aof%20text' | |
756 | ) | |
757 | ||
758 | def test_serialize_dict(self): | |
759 | d = pq(self.html4) | |
760 | self.assertEqual(d('form').serialize_dict(), { | |
761 | 'spam': 'Spam/spam', | |
762 | 'order': ['baked\r\nbeans', 'tomato'], | |
763 | 'multiline': 'multiple\r\nlines\r\nof text', | |
764 | }) | |
403 | 765 | |
404 | 766 | |
405 | 767 | class TestMakeLinks(TestCase): |
434 | 796 | self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html)) |
435 | 797 | d = pq(self.xml, parser='html') |
436 | 798 | d.after(self.html) # this should not fail |
437 | ||
438 | @not_py3k | |
439 | def test_soup_parser(self): | |
440 | d = pq('<meta><head><title>Hello</head><body onload=crash()>Hi all<p>', | |
441 | parser='soup') | |
442 | self.assertEqual(str(d), ( | |
443 | '<html><meta/><head><title>Hello</title></head>' | |
444 | '<body onload="crash()">Hi all<p/></body></html>')) | |
445 | 799 | |
446 | 800 | def test_replaceWith(self): |
447 | 801 | expected = '''<div class="portlet"> |
471 | 825 | <foo xmlns:bar="http://example.com/bar"> |
472 | 826 | <bar:blah>What</bar:blah> |
473 | 827 | <idiot>123</idiot> |
828 | <baz xmlns="http://example.com/baz" a="b"> | |
829 | <subbaz/> | |
830 | </baz> | |
474 | 831 | </foo>''' |
475 | 832 | |
476 | 833 | xhtml = ''' |
480 | 837 | </body> |
481 | 838 | </html>''' |
482 | 839 | |
840 | namespaces = {'bar': 'http://example.com/bar', | |
841 | 'baz': 'http://example.com/baz'} | |
842 | ||
483 | 843 | def test_selector(self): |
484 | 844 | expected = 'What' |
485 | d = pq(b(self.xml), parser='xml') | |
845 | d = pq(self.xml.encode('utf8'), parser='xml') | |
486 | 846 | val = d('bar|blah', |
487 | namespaces={'bar': 'http://example.com/bar'}).text() | |
847 | namespaces=self.namespaces).text() | |
488 | 848 | self.assertEqual(repr(val), repr(expected)) |
489 | 849 | |
490 | 850 | def test_selector_with_xml(self): |
491 | 851 | expected = 'What' |
492 | d = pq('bar|blah', b(self.xml), parser='xml', | |
493 | namespaces={'bar': 'http://example.com/bar'}) | |
852 | d = pq('bar|blah', self.xml.encode('utf8'), parser='xml', | |
853 | namespaces=self.namespaces) | |
494 | 854 | val = d.text() |
495 | 855 | self.assertEqual(repr(val), repr(expected)) |
496 | 856 | |
502 | 862 | |
503 | 863 | def test_xhtml_namespace(self): |
504 | 864 | expected = 'What' |
505 | d = pq(b(self.xhtml), parser='xml') | |
865 | d = pq(self.xhtml.encode('utf8'), parser='xml') | |
506 | 866 | d.xhtml_to_html() |
507 | 867 | val = d('div').text() |
508 | 868 | self.assertEqual(repr(val), repr(expected)) |
516 | 876 | |
517 | 877 | def test_remove_namespaces(self): |
518 | 878 | expected = 'What' |
519 | d = pq(b(self.xml), parser='xml').remove_namespaces() | |
879 | d = pq(self.xml.encode('utf8'), parser='xml').remove_namespaces() | |
520 | 880 | val = d('blah').text() |
521 | 881 | self.assertEqual(repr(val), repr(expected)) |
882 | ||
883 | def test_persistent_namespaces(self): | |
884 | d = pq(self.xml.encode('utf8'), parser='xml', | |
885 | namespaces=self.namespaces) | |
886 | val = d('bar|blah').text() | |
887 | self.assertEqual(repr(val), repr('What')) | |
888 | ||
889 | def test_namespace_traversal(self): | |
890 | d = pq(self.xml.encode('utf8'), parser='xml', | |
891 | namespaces=self.namespaces) | |
892 | val = d('baz|subbaz').closest('baz|baz').attr('a') | |
893 | self.assertEqual(repr(val), repr('b')) | |
522 | 894 | |
523 | 895 | |
524 | 896 | class TestWebScrapping(TestCase): |
529 | 901 | self.application_url = self.s.application_url.rstrip('/') |
530 | 902 | |
531 | 903 | def test_get(self): |
532 | d = pq(self.application_url, {'q': 'foo'}, | |
904 | d = pq(url=self.application_url, data={'q': 'foo'}, | |
533 | 905 | method='get') |
534 | 906 | print(d) |
535 | 907 | self.assertIn('REQUEST_METHOD: GET', d('p').text()) |
536 | 908 | self.assertIn('q=foo', d('p').text()) |
537 | 909 | |
538 | 910 | def test_post(self): |
539 | d = pq(self.application_url, {'q': 'foo'}, | |
911 | d = pq(url=self.application_url, data={'q': 'foo'}, | |
540 | 912 | method='post') |
541 | 913 | self.assertIn('REQUEST_METHOD: POST', d('p').text()) |
542 | 914 | self.assertIn('q=foo', d('p').text()) |
543 | 915 | |
916 | def test_session(self): | |
917 | if HAS_REQUEST: | |
918 | import requests | |
919 | session = requests.Session() | |
920 | session.headers.update({'X-FOO': 'bar'}) | |
921 | d = pq(url=self.application_url, data={'q': 'foo'}, | |
922 | method='get', session=session) | |
923 | self.assertIn('HTTP_X_FOO: bar', d('p').text()) | |
924 | else: | |
925 | self.skipTest('no requests library') | |
926 | ||
544 | 927 | def tearDown(self): |
545 | 928 | self.s.shutdown() |
546 | 929 | |
548 | 931 | class TestWebScrappingEncoding(TestCase): |
549 | 932 | |
550 | 933 | def test_get(self): |
551 | if not HAS_REQUEST: | |
552 | return | |
553 | d = pq(u('http://ru.wikipedia.org/wiki/Заглавная_страница', 'utf8'), | |
934 | d = pq(url=u'http://ru.wikipedia.org/wiki/Заглавная_страница', | |
554 | 935 | method='get') |
555 | 936 | print(d) |
556 | self.assertEqual(d('#n-mainpage a').text(), | |
557 | u('Заглавная страница', 'utf8')) | |
937 | self.assertEqual(d('#pt-login').text(), u'Войти') | |
938 | ||
939 | ||
940 | class TestWebScrappingTimeouts(TestCase): | |
941 | ||
942 | def setUp(self): | |
943 | def app(environ, start_response): | |
944 | start_response('200 OK', [('Content-Type', 'text/plain')]) | |
945 | time.sleep(2) | |
946 | return [b'foobar\n'] | |
947 | self.s = http.StopableWSGIServer.create(app) | |
948 | self.s.wait() | |
949 | self.application_url = self.s.application_url.rstrip('/') | |
950 | ||
951 | def test_get(self): | |
952 | pq(url=self.application_url) | |
953 | with self.assertRaises(Exception): | |
954 | pq(url=self.application_url, timeout=1) | |
955 | ||
956 | def tearDown(self): | |
957 | self.s.shutdown() |
0 | import os | |
1 | import unittest | |
2 | from threading import Thread | |
3 | from time import sleep | |
4 | ||
5 | from .browser_base import TextExtractionMixin | |
6 | ||
7 | SELENIUM = 'MOZ_HEADLESS' in os.environ | |
8 | ||
9 | try: | |
10 | from selenium import webdriver | |
11 | from selenium.webdriver.firefox.options import Options | |
12 | except ImportError: | |
13 | SELENIUM = False | |
14 | ||
15 | if SELENIUM: | |
16 | from urllib.parse import urlunsplit | |
17 | from http.server import HTTPServer, BaseHTTPRequestHandler | |
18 | from queue import Queue | |
19 | ||
20 | class BaseTestRequestHandler(BaseHTTPRequestHandler): | |
21 | _last_html = '' | |
22 | ||
23 | def _get_last_html(self): | |
24 | q = self.server.html_queue | |
25 | while not q.empty(): | |
26 | self._last_html = q.get_nowait() | |
27 | return self._last_html | |
28 | ||
29 | def log_request(self, code='-', size='-'): | |
30 | pass | |
31 | ||
32 | def recv_from_testsuite(self, non_blocking=False): | |
33 | q = self.server.in_queue | |
34 | if non_blocking: | |
35 | return None if q.empty() else q.get_nowait() | |
36 | return q.get() | |
37 | ||
38 | def send_to_testsuite(self, value): | |
39 | self.server.out_queue.put(value) | |
40 | ||
41 | class HTMLSnippetSender(BaseTestRequestHandler): | |
42 | last_html = b'' | |
43 | ||
44 | def get_last_html(self): | |
45 | while True: | |
46 | value = self.recv_from_testsuite(non_blocking=True) | |
47 | if value is None: | |
48 | break | |
49 | self.last_html = value | |
50 | return self.last_html | |
51 | ||
52 | def do_GET(self): | |
53 | if self.path == '/': | |
54 | self.send_response(200) | |
55 | self.send_header('Content-Type', 'text/html; charset=utf-8') | |
56 | self.end_headers() | |
57 | self.wfile.write(self.get_last_html().encode('utf-8')) | |
58 | else: | |
59 | self.send_response(404) | |
60 | self.end_headers() | |
61 | ||
62 | class BaseBrowserTest(unittest.TestCase): | |
63 | LOCAL_IP = '127.0.0.1' | |
64 | PORT = 28546 | |
65 | # descendant of BaseBrowserTestRequestHandler | |
66 | REQUEST_HANDLER_CLASS = None | |
67 | ||
68 | @classmethod | |
69 | def setUpClass(cls): | |
70 | cls.to_server_queue = Queue() | |
71 | cls.from_server_queue = Queue() | |
72 | cls.server = HTTPServer((cls.LOCAL_IP, cls.PORT), | |
73 | cls.REQUEST_HANDLER_CLASS) | |
74 | cls.server.in_queue = cls.to_server_queue | |
75 | cls.server.out_queue = cls.from_server_queue | |
76 | cls.server_thread = Thread(target=cls.server.serve_forever) | |
77 | cls.server_thread.daemon = True | |
78 | cls.server_thread.start() | |
79 | options = Options() | |
80 | options.add_argument('-headless') | |
81 | cls.driver = webdriver.Firefox(options=options) | |
82 | sleep(1) | |
83 | ||
84 | @classmethod | |
85 | def tearDownClass(cls): | |
86 | cls.driver.quit() | |
87 | cls.server.shutdown() | |
88 | cls.server.server_close() | |
89 | ||
90 | def send_to_server(self, value): | |
91 | self.to_server_queue.put(value) | |
92 | ||
93 | def recv_from_server(self, non_blocking=False): | |
94 | q = self.from_server_queue | |
95 | if non_blocking: | |
96 | return None if q.empty() else q.get_nowait() | |
97 | return q.get() | |
98 | ||
99 | def open_url(self, path): | |
100 | self.driver.get(urlunsplit( | |
101 | ('http', '{}:{}'.format( | |
102 | self.LOCAL_IP, self.PORT), path, '', ''))) | |
103 | ||
104 | class TestInnerText(BaseBrowserTest, TextExtractionMixin): | |
105 | REQUEST_HANDLER_CLASS = HTMLSnippetSender | |
106 | ||
107 | def _simple_test(self, html, expected_sq, expected_nosq, **kwargs): | |
108 | self.send_to_server(html) | |
109 | self.open_url('/') | |
110 | ||
111 | selenium_text = self.driver.find_element_by_tag_name('body').text | |
112 | self.assertEqual(selenium_text, expected_sq) | |
113 | ||
114 | # inner_text = self.driver.execute_script( | |
115 | # 'return document.body.innerText') | |
116 | # text_content = self.driver.execute_script( | |
117 | # 'return document.body.textContent') |
0 | 0 | [tox] |
1 | envlist=py26,py27,py33,py34 | |
1 | envlist=py35,py36,py37,py38 | |
2 | 2 | |
3 | 3 | [testenv] |
4 | whitelist_externals= | |
5 | rm | |
6 | passenv= | |
7 | MOZ_HEADLESS | |
4 | 8 | commands = |
5 | {envbindir}/nosetests [] | |
9 | pytest [] | |
6 | 10 | deps = |
7 | cssselect>0.7.9 | |
8 | requests | |
9 | WebOb>1.1.9 | |
10 | WebTest | |
11 | nose | |
12 | coverage | |
13 | unittest2 | |
14 | BeautifulSoup | |
15 | restkit | |
11 | py38: selenium | |
12 | -e .[test] | |
16 | 13 | |
17 | [testenv:py33] | |
18 | changedir={toxinidir} | |
14 | [testenv:flake8] | |
15 | skipsdist=true | |
16 | skip_install=true | |
17 | basepython = python3.8 | |
19 | 18 | commands = |
20 | {envbindir}/nosetests [] | |
19 | flake8 pyquery tests | |
21 | 20 | deps = |
22 | cssselect>0.7.9 | |
23 | requests | |
24 | WebOb>1.1.9 | |
25 | WebTest | |
26 | nose | |
27 | coverage | |
21 | flake8 | |
28 | 22 | |
29 | [testenv:py34] | |
30 | changedir={toxinidir} | |
23 | [testenv:docs] | |
24 | skip_install=false | |
25 | skipsdist=true | |
26 | basepython = python3.8 | |
27 | changedir = docs | |
28 | deps = | |
29 | sphinx | |
30 | Pygments | |
31 | 31 | commands = |
32 | {envbindir}/nosetests [] | |
33 | deps = | |
34 | cssselect>0.7.9 | |
35 | requests | |
36 | WebOb>1.1.9 | |
37 | WebTest | |
38 | nose | |
39 | coverage | |
32 | rm -Rf {envtmpdir}/doctrees {envtmpdir}/html | |
33 | sphinx-build -b html -d {envtmpdir}/doctrees . {envtmpdir}/html | |
34 | ||
35 | # [testenv:selenium] | |
36 | # basepython = python3.5 | |
37 | # deps = | |
38 | # selenium | |
39 | # commands = | |
40 | # {envbindir}/python -m unittest seleniumtests.offline | |
41 | # {envbindir}/python -m unittest seleniumtests.browser |