Import upstream version 2.1.0+ds+git20210105.1.be8dd08
Debian Janitor
2 years ago
0 | s4cmd_env/ | |
1 | test-tmp/ | |
2 | ||
3 | *.py[co] | |
4 | ||
5 | # Packages | |
6 | *.egg | |
7 | *.egg-info | |
8 | dist | |
9 | build | |
10 | eggs | |
11 | parts | |
12 | bin | |
13 | var | |
14 | sdist | |
15 | develop-eggs | |
16 | .installed.cfg | |
17 | ||
18 | # Installer logs | |
19 | pip-log.txt | |
20 | ||
21 | # Unit test / coverage reports | |
22 | .coverage | |
23 | .tox | |
24 | ||
25 | #Translations | |
26 | *.mo | |
27 | ||
28 | #Mr Developer | |
29 | .mr.developer.cfg |
0 | language: python | |
1 | python: | |
2 | # - '2.6' Need to fix | |
3 | - '2.7' | |
4 | - '3.4' | |
5 | - '3.5' | |
6 | - '3.6' | |
7 | before_install: | |
8 | - sudo apt-get -qq update | |
9 | - sudo apt-get install -y tree | |
10 | install: | |
11 | - export PIPENV_IGNORE_VIRTUALENVS=1 | |
12 | - pip install pipenv --upgrade | |
13 | - pipenv install --dev --skip-lock | |
14 | script: | |
15 | - export BUILD_ID="$TRAVIS_BUILD_ID" | |
16 | - "pipenv run ./runtests.sh" | |
17 | env: | |
18 | global: | |
19 | - secure: irES1orNAMPZiahXTa7mTuBW1vRf2iV1VUowEp4kDRZL94/lxUsivnjNRc2735lHOfUPh2ZVMM9NFiYl9qMxN7LTHjG532pOW4XQPdMyySD1opgg8kbl9+zz9c2hCYXGpUl1sn2IMgohQa+WD71LOyiX8jUoXVio38KtDe4ZB4M= | |
20 | - secure: Pf/z0k5E5e5d2KPKUKOxV6sXWv5vggRrswTZiN8mu9fv3QDpwKXQ3tBcTjmwRt76I6G8Vg1ugNcZRY4I0Dk0wmirM1KRxmRXSXFTtT89vrqezg1GeAy3Ffu7DPb0pH858om2QvVDAcLcMsqiXq/Hn62bjig+dd2MLbdWg2DhYvU= | |
21 | - secure: OQkYDl8R5cHl2HJrEhSFycSGybF9vdL19tG591rlLuKE/+WgRVa486qexlceWr1AkTjNawLLOhaomxnuCVzBO/sFdA1XdeCHQRCOXytfCJEHEYNNKcjLVYIw+roAOVeBHWQq7F4loyBDS1sVS6gb2rOsyb/wLCy3hJQ/gJQUhbY= |
0 | # CHANGELOG | |
1 | ||
2 | #### v2.1.0 | |
3 | ||
4 | - Added `--endpoint_url` flag to allow s4cmd to work with non-s3 object storage services ([#82](https://github.com/bloomreach/s4cmd/pull/82)) | |
5 | - Fix bug in pip install ([#102](https://github.com/bloomreach/s4cmd/pull/102)) | |
6 | - Fix bug which was leading to errors on zero length files ([#81](https://github.com/bloomreach/s4cmd/pull/81)) | |
7 | - Add flag `--version` to display s4cmd version | |
8 | - Check added to ensure consistency of `os.write` in method `write_file_chunk` | |
9 | - Full E2E test-suite running on python 2 and 3, backed by Travis-CI | |
10 | ||
11 | #### v2.0.1 | |
12 | ||
13 | - Merge change from @rameshrajagopal for S3 keys in command-line parameters. | |
14 | ||
15 | #### v2.0.0 | |
16 | ||
17 | - Fully migrated from old boto 2.x to new boto3 library. | |
18 | - Support S3 pass through APIs. | |
19 | - Support batch delete (with delete_objects API). | |
20 | - Support S4CMD_OPTS environment variable. | |
21 | - Support moving files larger than 5GB with multipart upload. | |
22 | - Support timestamp filtering with --last-modified-before and --last-modified-after options. | |
23 | - Faster upload with lazy evaluation of md5 hash. | |
24 | - Listing large number of files with S3 pagination, with memory is the limit. | |
25 | - New directory to directory dsync command to replace old sync command. | |
26 | ||
27 | #### v1.5.23 | |
28 | ||
29 | - Add bash command line completion | |
30 | ||
31 | #### v1.5.22 | |
32 | ||
33 | - Add compatibility for Python3 | |
34 | ||
35 | #### v1.5.21 | |
36 | ||
37 | - Merge changes from linsomniac@github for better argument parsing | |
38 | ||
39 | #### v1.5.20 | |
40 | ||
41 | - Merge change from oniltonmaciel@github for arguments for multi-part upload. | |
42 | - Fix setup.py for module and command line tool | |
43 | ||
44 | #### v1.5.19 | |
45 | ||
46 | - Set socket.setdefaulttimeout() to prevent boto/s3 socket read block in httplib. | |
47 | ||
48 | #### v1.5.18 | |
49 | ||
50 | - Use validate=self.opt.validate to prevent extraneous list API calls. | |
51 | ||
52 | #### v1.5.17 | |
53 | ||
54 | - Check file size consistency after download; will retry the download if inconsistent. | |
55 | ||
56 | #### v1.5.16 | |
57 | ||
58 | - Disable consecutive slashes removal. | |
59 | ||
60 | #### v1.5.15 | |
61 | ||
62 | - Close http connection cleanly after thread pool execution. | |
63 | ||
64 | #### v1.5.14 | |
65 | ||
66 | - Copy file privileges. If s4cmd sync is used, then it only update privileges of files when their signatures are different | |
67 | ||
68 | #### v1.5.13 | |
69 | ||
70 | - Also retry S3ResponseError exceptions. | |
71 | ||
72 | #### v1.5.12 | |
73 | ||
74 | - Add RetryFailure class to unknown network failures. | |
75 | ||
76 | #### v1.5.11 | |
77 | ||
78 | - Fix atomic write issue for small files calling boto API directly. | |
79 | - Add code to cleanup temp files. | |
80 | - Fix a bug where pretty_print calls message() without format. | |
81 | ||
82 | #### v1.5.10 | |
83 | ||
84 | - Fix options global variable bug | |
85 | ||
86 | #### v1.5.9 | |
87 | ||
88 | - Open source licensing. | |
89 | ||
90 | #### v1.5.8 | |
91 | ||
92 | - Fix the initialization of Options class. | |
93 | ||
94 | #### v1.5.7 | |
95 | ||
96 | - Fix multi-threading race condition with os.makedirs call | |
97 | ||
98 | #### v1.5.6 | |
99 | ||
100 | - Fix s4cmd get/sync error with --ignore-empty-source for empty source | |
101 | ||
102 | #### v1.5.5 | |
103 | ||
104 | - Implement environment variable S4CMD_NUM_THREADS to change the default | |
105 | number of threads. | |
106 | ||
107 | #### v1.5.4 | |
108 | ||
109 | - Implement --ignore-empty-source parameter for backward compatibility. | |
110 | ||
111 | #### v1.5.3 | |
112 | ||
113 | - Implement du and _totalsize command. | |
114 | ||
115 | #### v1.5.2 | |
116 | ||
117 | - Read keys from environment variable or s3cfg. | |
118 | - Implement mv command | |
119 | ||
120 | #### v1.5.1 | |
121 | ||
122 | - Fix the bug that recursive S3 walk wrongly check the prefix. | |
123 | - Add more tests. | |
124 | - Fix md5 etag (with double quote) checking bug. | |
125 | ||
126 | #### v1.5 | |
127 | ||
128 | - Allow wildcards with recursive mode. | |
129 | - Support -d option for ls command. | |
130 | ||
131 | #### v1.0.2 | |
132 | ||
133 | - Fix the problem of get/put/sync directories. | |
134 | - Fix the wildcard check for sync command. | |
135 | - Temporarily avoid multipart upload for files smaller than 4.5G | |
136 | - Stop showing progress if output is not connected to tty. | |
137 | ||
138 | #### v1.0.1 | |
139 | ||
140 | - Fixed wrongly directory created by cp command with a single file. | |
141 | - Fixed wrong directory discovery with a single child directory. | |
142 |
0 | ||
1 | Apache License | |
2 | Version 2.0, January 2004 | |
3 | http://www.apache.org/licenses/ | |
4 | ||
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |
6 | ||
7 | 1. Definitions. | |
8 | ||
9 | "License" shall mean the terms and conditions for use, reproduction, | |
10 | and distribution as defined by Sections 1 through 9 of this document. | |
11 | ||
12 | "Licensor" shall mean the copyright owner or entity authorized by | |
13 | the copyright owner that is granting the License. | |
14 | ||
15 | "Legal Entity" shall mean the union of the acting entity and all | |
16 | other entities that control, are controlled by, or are under common | |
17 | control with that entity. For the purposes of this definition, | |
18 | "control" means (i) the power, direct or indirect, to cause the | |
19 | direction or management of such entity, whether by contract or | |
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the | |
21 | outstanding shares, or (iii) beneficial ownership of such entity. | |
22 | ||
23 | "You" (or "Your") shall mean an individual or Legal Entity | |
24 | exercising permissions granted by this License. | |
25 | ||
26 | "Source" form shall mean the preferred form for making modifications, | |
27 | including but not limited to software source code, documentation | |
28 | source, and configuration files. | |
29 | ||
30 | "Object" form shall mean any form resulting from mechanical | |
31 | transformation or translation of a Source form, including but | |
32 | not limited to compiled object code, generated documentation, | |
33 | and conversions to other media types. | |
34 | ||
35 | "Work" shall mean the work of authorship, whether in Source or | |
36 | Object form, made available under the License, as indicated by a | |
37 | copyright notice that is included in or attached to the work | |
38 | (an example is provided in the Appendix below). | |
39 | ||
40 | "Derivative Works" shall mean any work, whether in Source or Object | |
41 | form, that is based on (or derived from) the Work and for which the | |
42 | editorial revisions, annotations, elaborations, or other modifications | |
43 | represent, as a whole, an original work of authorship. For the purposes | |
44 | of this License, Derivative Works shall not include works that remain | |
45 | separable from, or merely link (or bind by name) to the interfaces of, | |
46 | the Work and Derivative Works thereof. | |
47 | ||
48 | "Contribution" shall mean any work of authorship, including | |
49 | the original version of the Work and any modifications or additions | |
50 | to that Work or Derivative Works thereof, that is intentionally | |
51 | submitted to Licensor for inclusion in the Work by the copyright owner | |
52 | or by an individual or Legal Entity authorized to submit on behalf of | |
53 | the copyright owner. For the purposes of this definition, "submitted" | |
54 | means any form of electronic, verbal, or written communication sent | |
55 | to the Licensor or its representatives, including but not limited to | |
56 | communication on electronic mailing lists, source code control systems, | |
57 | and issue tracking systems that are managed by, or on behalf of, the | |
58 | Licensor for the purpose of discussing and improving the Work, but | |
59 | excluding communication that is conspicuously marked or otherwise | |
60 | designated in writing by the copyright owner as "Not a Contribution." | |
61 | ||
62 | "Contributor" shall mean Licensor and any individual or Legal Entity | |
63 | on behalf of whom a Contribution has been received by Licensor and | |
64 | subsequently incorporated within the Work. | |
65 | ||
66 | 2. Grant of Copyright License. Subject to the terms and conditions of | |
67 | this License, each Contributor hereby grants to You a perpetual, | |
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |
69 | copyright license to reproduce, prepare Derivative Works of, | |
70 | publicly display, publicly perform, sublicense, and distribute the | |
71 | Work and such Derivative Works in Source or Object form. | |
72 | ||
73 | 3. Grant of Patent License. Subject to the terms and conditions of | |
74 | this License, each Contributor hereby grants to You a perpetual, | |
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |
76 | (except as stated in this section) patent license to make, have made, | |
77 | use, offer to sell, sell, import, and otherwise transfer the Work, | |
78 | where such license applies only to those patent claims licensable | |
79 | by such Contributor that are necessarily infringed by their | |
80 | Contribution(s) alone or by combination of their Contribution(s) | |
81 | with the Work to which such Contribution(s) was submitted. If You | |
82 | institute patent litigation against any entity (including a | |
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work | |
84 | or a Contribution incorporated within the Work constitutes direct | |
85 | or contributory patent infringement, then any patent licenses | |
86 | granted to You under this License for that Work shall terminate | |
87 | as of the date such litigation is filed. | |
88 | ||
89 | 4. Redistribution. You may reproduce and distribute copies of the | |
90 | Work or Derivative Works thereof in any medium, with or without | |
91 | modifications, and in Source or Object form, provided that You | |
92 | meet the following conditions: | |
93 | ||
94 | (a) You must give any other recipients of the Work or | |
95 | Derivative Works a copy of this License; and | |
96 | ||
97 | (b) You must cause any modified files to carry prominent notices | |
98 | stating that You changed the files; and | |
99 | ||
100 | (c) You must retain, in the Source form of any Derivative Works | |
101 | that You distribute, all copyright, patent, trademark, and | |
102 | attribution notices from the Source form of the Work, | |
103 | excluding those notices that do not pertain to any part of | |
104 | the Derivative Works; and | |
105 | ||
106 | (d) If the Work includes a "NOTICE" text file as part of its | |
107 | distribution, then any Derivative Works that You distribute must | |
108 | include a readable copy of the attribution notices contained | |
109 | within such NOTICE file, excluding those notices that do not | |
110 | pertain to any part of the Derivative Works, in at least one | |
111 | of the following places: within a NOTICE text file distributed | |
112 | as part of the Derivative Works; within the Source form or | |
113 | documentation, if provided along with the Derivative Works; or, | |
114 | within a display generated by the Derivative Works, if and | |
115 | wherever such third-party notices normally appear. The contents | |
116 | of the NOTICE file are for informational purposes only and | |
117 | do not modify the License. You may add Your own attribution | |
118 | notices within Derivative Works that You distribute, alongside | |
119 | or as an addendum to the NOTICE text from the Work, provided | |
120 | that such additional attribution notices cannot be construed | |
121 | as modifying the License. | |
122 | ||
123 | You may add Your own copyright statement to Your modifications and | |
124 | may provide additional or different license terms and conditions | |
125 | for use, reproduction, or distribution of Your modifications, or | |
126 | for any such Derivative Works as a whole, provided Your use, | |
127 | reproduction, and distribution of the Work otherwise complies with | |
128 | the conditions stated in this License. | |
129 | ||
130 | 5. Submission of Contributions. Unless You explicitly state otherwise, | |
131 | any Contribution intentionally submitted for inclusion in the Work | |
132 | by You to the Licensor shall be under the terms and conditions of | |
133 | this License, without any additional terms or conditions. | |
134 | Notwithstanding the above, nothing herein shall supersede or modify | |
135 | the terms of any separate license agreement you may have executed | |
136 | with Licensor regarding such Contributions. | |
137 | ||
138 | 6. Trademarks. This License does not grant permission to use the trade | |
139 | names, trademarks, service marks, or product names of the Licensor, | |
140 | except as required for reasonable and customary use in describing the | |
141 | origin of the Work and reproducing the content of the NOTICE file. | |
142 | ||
143 | 7. Disclaimer of Warranty. Unless required by applicable law or | |
144 | agreed to in writing, Licensor provides the Work (and each | |
145 | Contributor provides its Contributions) on an "AS IS" BASIS, | |
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
147 | implied, including, without limitation, any warranties or conditions | |
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |
149 | PARTICULAR PURPOSE. You are solely responsible for determining the | |
150 | appropriateness of using or redistributing the Work and assume any | |
151 | risks associated with Your exercise of permissions under this License. | |
152 | ||
153 | 8. Limitation of Liability. In no event and under no legal theory, | |
154 | whether in tort (including negligence), contract, or otherwise, | |
155 | unless required by applicable law (such as deliberate and grossly | |
156 | negligent acts) or agreed to in writing, shall any Contributor be | |
157 | liable to You for damages, including any direct, indirect, special, | |
158 | incidental, or consequential damages of any character arising as a | |
159 | result of this License or out of the use or inability to use the | |
160 | Work (including but not limited to damages for loss of goodwill, | |
161 | work stoppage, computer failure or malfunction, or any and all | |
162 | other commercial damages or losses), even if such Contributor | |
163 | has been advised of the possibility of such damages. | |
164 | ||
165 | 9. Accepting Warranty or Additional Liability. While redistributing | |
166 | the Work or Derivative Works thereof, You may choose to offer, | |
167 | and charge a fee for, acceptance of support, warranty, indemnity, | |
168 | or other liability obligations and/or rights consistent with this | |
169 | License. However, in accepting such obligations, You may act only | |
170 | on Your own behalf and on Your sole responsibility, not on behalf | |
171 | of any other Contributor, and only if You agree to indemnify, | |
172 | defend, and hold each Contributor harmless for any liability | |
173 | incurred by, or claims asserted against, such Contributor by reason | |
174 | of your accepting any such warranty or additional liability. | |
175 | ||
176 | END OF TERMS AND CONDITIONS | |
177 | ||
178 | APPENDIX: How to apply the Apache License to your work. | |
179 | ||
180 | To apply the Apache License to your work, attach the following | |
181 | boilerplate notice, with the fields enclosed by brackets "[]" | |
182 | replaced with your own identifying information. (Don't include | |
183 | the brackets!) The text should be enclosed in the appropriate | |
184 | comment syntax for the file format. We also recommend that a | |
185 | file or class name and description of purpose be included on the | |
186 | same "printed page" as the copyright notice for easier | |
187 | identification within third-party archives. | |
188 | ||
189 | Copyright [yyyy] [name of copyright owner] | |
190 | ||
191 | Licensed under the Apache License, Version 2.0 (the "License"); | |
192 | you may not use this file except in compliance with the License. | |
193 | You may obtain a copy of the License at | |
194 | ||
195 | http://www.apache.org/licenses/LICENSE-2.0 | |
196 | ||
197 | Unless required by applicable law or agreed to in writing, software | |
198 | distributed under the License is distributed on an "AS IS" BASIS, | |
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
200 | See the License for the specific language governing permissions and | |
201 | limitations under the License. |
0 | Metadata-Version: 2.1 | |
1 | Name: s4cmd | |
2 | Version: 2.1.0 | |
3 | Summary: Super S3 command line tool | |
4 | Home-page: https://github.com/bloomreach/s4cmd | |
5 | Author: Chou-han Yang | |
6 | License: http://www.apache.org/licenses/LICENSE-2.0 | |
7 | Description: # s4cmd | |
8 | ### Super S3 command line tool | |
9 | [![Build Status](https://travis-ci.com/bloomreach/s4cmd.svg?branch=master)](https://travis-ci.com/bloomreach/s4cmd) [![Join the chat at https://gitter.im/bloomreach/ s4cmd](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/bloomreach/s4cmd?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |
10 | ||
11 | ---- | |
12 | ||
13 | **Author**: Chou-han Yang ([@chouhanyang](https://github.com/chouhanyang)) | |
14 | ||
15 | **Current Maintainers**: Debodirno Chandra ([@debodirno](https://github.com/debodirno)) | Naveen Vardhi ([@rozuur](https://github.com/rozuur)) | Navin Pai ([@navinpai](https://github.com/navinpai)) | |
16 | ||
17 | ---- | |
18 | ||
19 | ## What's New in s4cmd 2.x | |
20 | ||
21 | - Fully migrated from old boto 2.x to new [boto3](http://boto3.readthedocs.io/en/latest/reference/services/s3.html) library, which provides more reliable and up-to-date S3 backend. | |
22 | - Support S3 `--API-ServerSideEncryption` along with **36 new API pass-through options**. See API pass-through options section for complete list. | |
23 | - Support batch delete (with delete_objects API) to delete up to 1000 files with single call. **100+ times faster** than sequential deletion. | |
24 | - Support `S4CMD_OPTS` environment variable for commonly used options such as `--API-ServerSideEncryption` across all your s4cmd operations. | |
25 | - Support moving files **larger than 5GB** with multipart upload. **20+ times faster** then sequential move operation when moving large files. | |
26 | - Support timestamp filtering with `--last-modified-before` and `--last-modified-after` options for all operations. Human friendly timestamps are supported, e.g. `--last-modified-before='2 months ago'` | |
27 | - Faster upload with lazy evaluation of md5 hash. | |
28 | - Listing large number of files with S3 pagination, with memory is the limit. | |
29 | - New directory to directory `dsync` command is better and standalone implementation to replace old `sync` command, which is implemented based on top of get/put/mv commands. `--delete-removed` work for all cases including local to s3, s3 to local, and s3 to s3. `sync` command preserves the old behavior in this version for compatibility. | |
30 | - [Support for S3 compatible storage services](https://github.com/bloomreach/s4cmd/issues/52) such as DreamHost and Cloudian using `--endpoint-url` (Community Supported Beta Feature). | |
31 | - Tested on both Python 2.7, 3.6, 3.7, 3.8, 3.9 and nightly. | |
32 | - Special thanks to [onera.com](http://www.onera.com) for supporting s4cmd. | |
33 | ||
34 | ||
35 | ## Motivation | |
36 | ||
37 | S4cmd is a command-line utility for accessing | |
38 | [Amazon S3](http://en.wikipedia.org/wiki/Amazon_S3), inspired by | |
39 | [s3cmd](http://s3tools.org/s3cmd). | |
40 | ||
41 | We have used s3cmd heavily for a number of scripted, data-intensive | |
42 | applications. However as the need for a variety of small improvements arose, we | |
43 | created our own implementation, s4cmd. It is intended as an alternative to | |
44 | s3cmd for enhanced performance and for large files, and with a number of | |
45 | additional features and fixes that we have found useful. | |
46 | ||
47 | It strives to be compatible with the most common usage scenarios for s3cmd. It | |
48 | does not offer exact drop-in compatibility, due to a number of corner cases where | |
49 | different behavior seems preferable, or for bugfixes. | |
50 | ||
51 | ||
52 | ## Features | |
53 | ||
54 | S4cmd supports the regular commands you might expect for fetching and storing | |
55 | files in S3: `ls`, `put`, `get`, `cp`, `mv`, `sync`, `del`, `du`. | |
56 | ||
57 | The main features that distinguish s4cmd are: | |
58 | ||
59 | - Simple (less than 1500 lines of code) and implemented in pure Python, based | |
60 | on the widely used [Boto3](https://github.com/boto/boto3) library. | |
61 | - Multi-threaded/multi-connection implementation for enhanced performance on all | |
62 | commands. As with many network-intensive applications (like web browsers), | |
63 | accessing S3 in a single-threaded way is often significantly less efficient than | |
64 | having multiple connections actively transferring data at once. In general, we | |
65 | get a 2X boost to upload/download speeds from this. | |
66 | - Path handling: S3 is not a traditional filesystem with built-in support for | |
67 | directory structure: internally, there are only objects, not directories or | |
68 | folders. However, most people use S3 in a hierarchical structure, with paths | |
69 | separated by slashes, to emulate traditional filesystems. S4cmd follows | |
70 | conventions to more closely replicate the behavior of traditional filesystems | |
71 | in certain corner cases. For example, "ls" and "cp" work much like in Unix | |
72 | shells, to avoid odd surprises. (For examples see compatibility notes below.) | |
73 | - Wildcard support: Wildcards, including multiple levels of wildcards, like in | |
74 | Unix shells, are handled. For example: | |
75 | s3://my-bucket/my-folder/20120512/*/*chunk00?1? | |
76 | - Automatic retry: Failure tasks will be executed again after a delay. | |
77 | - Multi-part upload support for files larger than 5GB. | |
78 | - Handling of MD5s properly with respect to multi-part uploads (for the sordid | |
79 | details of this, see below). | |
80 | - Miscellaneous enhancements and bugfixes: | |
81 | - Partial file creation: Avoid creating empty target files if source does not | |
82 | exist. Avoid creating partial output files when commands are interrupted. | |
83 | - General thread safety: Tool can be interrupted or killed at any time without | |
84 | being blocked by child threads or leaving incomplete or corrupt files in | |
85 | place. | |
86 | - Ensure exit code is nonzero on all failure scenarios (a very important | |
87 | feature in scripts). | |
88 | - Expected handling of symlinks (they are followed). | |
89 | - Support both `s3://` and `s3n://` prefixes (the latter is common with | |
90 | Amazon Elastic Mapreduce). | |
91 | ||
92 | Limitations: | |
93 | ||
94 | - No CloudFront or other feature support. | |
95 | - Currently, we simulate `sync` with `get` and `put` with `--recursive --force --sync-check`. | |
96 | ||
97 | ||
98 | ## Installation and Setup | |
99 | You can install `s4cmd` [PyPI](https://pypi.python.org/pypi/s4cmd). | |
100 | ||
101 | ``` | |
102 | pip install s4cmd | |
103 | ``` | |
104 | ||
105 | - Copy or create a symbolic link so you can run `s4cmd.py` as `s4cmd`. (It is just | |
106 | a single file!) | |
107 | - If you already have a `~/.s3cfg` file from configuring `s3cmd`, credentials | |
108 | from this file will be used. Otherwise, set the `S3_ACCESS_KEY` and | |
109 | `S3_SECRET_KEY` environment variables to contain your S3 credentials. | |
110 | - If no keys are provided, but an IAM role is associated with the EC2 instance, it will | |
111 | be used transparently. | |
112 | ||
113 | ||
114 | ## s4cmd Commands | |
115 | ||
116 | #### `s4cmd ls [path]` | |
117 | ||
118 | List all contents of a directory. | |
119 | ||
120 | * -r/--recursive: recursively display all contents including subdirectories under the given path. | |
121 | * -d/--show-directory: show the directory entry instead of its content. | |
122 | ||
123 | ||
124 | #### `s4cmd put [source] [target]` | |
125 | ||
126 | Upload local files up to S3. | |
127 | ||
128 | * -r/--recursive: also upload directories recursively. | |
129 | * -s/--sync-check: check md5 hash to avoid uploading the same content. | |
130 | * -f/--force: override existing file instead of showing error message. | |
131 | * -n/--dry-run: emulate the operation without real upload. | |
132 | ||
133 | #### `s4cmd get [source] [target]` | |
134 | ||
135 | Download files from S3 to local filesystem. | |
136 | ||
137 | * -r/--recursive: also download directories recursively. | |
138 | * -s/--sync-check: check md5 hash to avoid downloading the same content. | |
139 | * -f/--force: override existing file instead of showing error message. | |
140 | * -n/--dry-run: emulate the operation without real download. | |
141 | ||
142 | ||
143 | #### `s4cmd dsync [source dir] [target dir]` | |
144 | ||
145 | Synchronize the contents of two directories. The directory can either be local or remote, but currently, it doesn't support two local directories. | |
146 | ||
147 | * -r/--recursive: also sync directories recursively. | |
148 | * -s/--sync-check: check md5 hash to avoid syncing the same content. | |
149 | * -f/--force: override existing file instead of showing error message. | |
150 | * -n/--dry-run: emulate the operation without real sync. | |
151 | * --delete-removed: delete files not in source directory. | |
152 | ||
153 | #### `s4cmd sync [source] [target]` | |
154 | ||
155 | (Obsolete, use `dsync` instead) Synchronize the contents of two directories. The directory can either be local or remote, but currently, it doesn't support two local directories. This command simply invoke get/put/mv commands. | |
156 | ||
157 | * -r/--recursive: also sync directories recursively. | |
158 | * -s/--sync-check: check md5 hash to avoid syncing the same content. | |
159 | * -f/--force: override existing file instead of showing error message. | |
160 | * -n/--dry-run: emulate the operation without real sync. | |
161 | * --delete-removed: delete files not in source directory. Only works when syncing local directory to s3 directory. | |
162 | ||
163 | #### `s4cmd cp [source] [target]` | |
164 | ||
165 | Copy a file or a directory from a S3 location to another. | |
166 | ||
167 | * -r/--recursive: also copy directories recursively. | |
168 | * -s/--sync-check: check md5 hash to avoid copying the same content. | |
169 | * -f/--force: override existing file instead of showing error message. | |
170 | * -n/--dry-run: emulate the operation without real copy. | |
171 | ||
172 | #### `s4cmd mv [source] [target]` | |
173 | ||
174 | Move a file or a directory from a S3 location to another. | |
175 | ||
176 | * -r/--recursive: also move directories recursively. | |
177 | * -s/--sync-check: check md5 hash to avoid moving the same content. | |
178 | * -f/--force: override existing file instead of showing error message. | |
179 | * -n/--dry-run: emulate the operation without real move. | |
180 | ||
181 | #### `s4cmd del [path]` | |
182 | ||
183 | Delete files or directories on S3. | |
184 | ||
185 | * -r/--recursive: also delete directories recursively. | |
186 | * -n/--dry-run: emulate the operation without real delete. | |
187 | ||
188 | #### `s4cmd du [path]` | |
189 | ||
190 | Get the size of the given directory. | |
191 | ||
192 | Available parameters: | |
193 | ||
194 | * -r/--recursive: also add sizes of sub-directories recursively. | |
195 | ||
196 | ## s4cmd Control Options | |
197 | ||
198 | ##### `-p S3CFG, --config=[filename]` | |
199 | path to s3cfg config file | |
200 | ||
201 | ##### `-f, --force` | |
202 | force overwrite files when download or upload | |
203 | ||
204 | ##### `-r, --recursive` | |
205 | recursively checking subdirectories | |
206 | ||
207 | ##### `-s, --sync-check` | |
208 | check file md5 before download or upload | |
209 | ||
210 | ##### `-n, --dry-run` | |
211 | trial run without actual download or upload | |
212 | ||
213 | ##### `-t RETRY, --retry=[integer]` | |
214 | number of retries before giving up | |
215 | ||
216 | ##### `--retry-delay=[integer]` | |
217 | seconds to sleep between retries | |
218 | ||
219 | ##### `-c NUM_THREADS, --num-threads=NUM_THREADS` | |
220 | number of concurrent threads | |
221 | ||
222 | ##### `--endpoint-url` | |
223 | endpoint url used in boto3 client | |
224 | ||
225 | ##### `-d, --show-directory` | |
226 | show directory instead of its content | |
227 | ||
228 | ##### `--ignore-empty-source` | |
229 | ignore empty source from s3 | |
230 | ||
231 | ##### `--use-ssl` | |
232 | (obsolete) use SSL connection to S3 | |
233 | ||
234 | ##### `--verbose` | |
235 | verbose output | |
236 | ||
237 | ##### `--debug` | |
238 | debug output | |
239 | ||
240 | ##### `--validate` | |
241 | (obsolete) validate lookup operation | |
242 | ||
243 | ##### `-D, --delete-removed` | |
244 | delete remote files that do not exist in source after sync | |
245 | ||
246 | ##### `--multipart-split-size=[integer]` | |
247 | size in bytes to split multipart transfers | |
248 | ||
249 | ##### `--max-singlepart-download-size=[integer]` | |
250 | files with size (in bytes) greater than this will be | |
251 | downloaded in multipart transfers | |
252 | ||
253 | ##### `--max-singlepart-upload-size=[integer]` | |
254 | files with size (in bytes) greater than this will be | |
255 | uploaded in multipart transfers | |
256 | ||
257 | ##### `--max-singlepart-copy-size=[integer]` | |
258 | files with size (in bytes) greater than this will be | |
259 | copied in multipart transfers | |
260 | ||
261 | ##### `--batch-delete-size=[integer]` | |
262 | Number of files (<1000) to be combined in batch delete. | |
263 | ||
264 | ##### `--last-modified-before=[datetime]` | |
265 | Condition on files where their last modified dates are | |
266 | before given parameter. | |
267 | ||
268 | ##### `--last-modified-after=[datetime]` | |
269 | Condition on files where their last modified dates are | |
270 | after given parameter. | |
271 | ||
272 | ||
273 | ## S3 API Pass-through Options | |
274 | ||
275 | Those options are directly translated to boto3 API commands. The options provided will be filtered by the APIs that are taking parameters. For example, `--API-ServerSideEncryption` is only needed for `put_object`, `create_multipart_upload` but not for `list_buckets` and `get_objects` for example. Therefore, providing `--API-ServerSideEncryption` for `s4cmd ls` has no effect. | |
276 | ||
277 | For more information, please see boto3 s3 documentations http://boto3.readthedocs.io/en/latest/reference/services/s3.html | |
278 | ||
279 | ##### `--API-ACL=[string]` | |
280 | The canned ACL to apply to the object. | |
281 | ||
282 | ##### `--API-CacheControl=[string]` | |
283 | Specifies caching behavior along the request/reply chain. | |
284 | ||
285 | ##### `--API-ContentDisposition=[string]` | |
286 | Specifies presentational information for the object. | |
287 | ||
288 | ##### `--API-ContentEncoding=[string]` | |
289 | Specifies what content encodings have been applied to the object and thus what decoding mechanisms must be applied to obtain the media-type referenced by the Content-Type header field. | |
290 | ||
291 | ##### `--API-ContentLanguage=[string]` | |
292 | The language the content is in. | |
293 | ||
294 | ##### `--API-ContentMD5=[string]` | |
295 | The base64-encoded 128-bit MD5 digest of the part data. | |
296 | ||
297 | ##### `--API-ContentType=[string]` | |
298 | A standard MIME type describing the format of the object data. | |
299 | ||
300 | ##### `--API-CopySourceIfMatch=[string]` | |
301 | Copies the object if its entity tag (ETag) matches the specified tag. | |
302 | ||
303 | ##### `--API-CopySourceIfModifiedSince=[datetime]` | |
304 | Copies the object if it has been modified since the specified time. | |
305 | ||
306 | ##### `--API-CopySourceIfNoneMatch=[string]` | |
307 | Copies the object if its entity tag (ETag) is different than the specified ETag. | |
308 | ||
309 | ##### `--API-CopySourceIfUnmodifiedSince=[datetime]` | |
310 | Copies the object if it hasn't been modified since the specified time. | |
311 | ||
312 | ##### `--API-CopySourceRange=[string]` | |
313 | The range of bytes to copy from the source object. The range value must use the form bytes=first-last, where the first and last are the zero-based byte offsets to copy. For example, bytes=0-9 indicates that you want to copy the first ten bytes of the source. You can copy a range only if the source object is greater than 5 GB. | |
314 | ||
315 | ##### `--API-CopySourceSSECustomerAlgorithm=[string]` | |
316 | Specifies the algorithm to use when decrypting the source object (e.g., AES256). | |
317 | ||
318 | ##### `--API-CopySourceSSECustomerKeyMD5=[string]` | |
319 | Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321. Amazon S3 uses this header for a message integrity check to ensure the encryption key was transmitted without error. Please note that this parameter is automatically populated if it is not provided. Including this parameter is not required | |
320 | ||
321 | ##### `--API-CopySourceSSECustomerKey=[string]` | |
322 | Specifies the customer-provided encryption key for Amazon S3 to use to decrypt the source object. The encryption key provided in this header must be one that was used when the source object was created. | |
323 | ||
324 | ##### `--API-ETag=[string]` | |
325 | Entity tag returned when the part was uploaded. | |
326 | ||
327 | ##### `--API-Expires=[datetime]` | |
328 | The date and time at which the object is no longer cacheable. | |
329 | ||
330 | ##### `--API-GrantFullControl=[string]` | |
331 | Gives the grantee READ, READ_ACP, and WRITE_ACP permissions on the object. | |
332 | ||
333 | ##### `--API-GrantReadACP=[string]` | |
334 | Allows grantee to read the object ACL. | |
335 | ||
336 | ##### `--API-GrantRead=[string]` | |
337 | Allows grantee to read the object data and its metadata. | |
338 | ||
339 | ##### `--API-GrantWriteACP=[string]` | |
340 | Allows grantee to write the ACL for the applicable object. | |
341 | ||
342 | ##### `--API-IfMatch=[string]` | |
343 | Return the object only if its entity tag (ETag) is the same as the one specified, otherwise return a 412 (precondition failed). | |
344 | ||
345 | ##### `--API-IfModifiedSince=[datetime]` | |
346 | Return the object only if it has been modified since the specified time, otherwise return a 304 (not modified). | |
347 | ||
348 | ##### `--API-IfNoneMatch=[string]` | |
349 | Return the object only if its entity tag (ETag) is different from the one specified, otherwise return a 304 (not modified). | |
350 | ||
351 | ##### `--API-IfUnmodifiedSince=[datetime]` | |
352 | Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed). | |
353 | ||
354 | ##### `--API-Metadata=[dict]` | |
355 | A map (in json string) of metadata to store with the object in S3 | |
356 | ||
357 | ##### `--API-MetadataDirective=[string]` | |
358 | Specifies whether the metadata is copied from the source object or replaced with metadata provided in the request. | |
359 | ||
360 | ##### `--API-MFA=[string]` | |
361 | The concatenation of the authentication device's serial number, a space, and the value that is displayed on your authentication device. | |
362 | ||
363 | ##### `--API-RequestPayer=[string]` | |
364 | Confirms that the requester knows that she or he will be charged for the request. Bucket owners need not specify this parameter in their requests. Documentation on downloading objects from requester pays buckets can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html | |
365 | ||
366 | ##### `--API-ServerSideEncryption=[string]` | |
367 | The Server-side encryption algorithm used when storing this object in S3 (e.g., AES256, aws:kms). | |
368 | ||
369 | ##### `--API-SSECustomerAlgorithm=[string]` | |
370 | Specifies the algorithm to use to when encrypting the object (e.g., AES256). | |
371 | ||
372 | ##### `--API-SSECustomerKeyMD5=[string]` | |
373 | Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321. Amazon S3 uses this header for a message integrity check to ensure the encryption key was transmitted without error. Please note that this parameter is automatically populated if it is not provided. Including this parameter is not required | |
374 | ||
375 | ##### `--API-SSECustomerKey=[string]` | |
376 | Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon does not store the encryption key. The key must be appropriate for use with the algorithm specified in the x-amz-server-side-encryption-customer-algorithm header. | |
377 | ||
378 | ##### `--API-SSEKMSKeyId=[string]` | |
379 | Specifies the AWS KMS key ID to use for object encryption. All GET and PUT requests for an object protected by AWS KMS will fail if not made via SSL or using SigV4. Documentation on configuring any of the officially supported AWS SDKs and CLI can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#specify-signature-version | |
380 | ||
381 | ##### `--API-StorageClass=[string]` | |
382 | The type of storage to use for the object. Defaults to 'STANDARD'. | |
383 | ||
384 | ##### `--API-VersionId=[string]` | |
385 | VersionId used to reference a specific version of the object. | |
386 | ||
387 | ##### `--API-WebsiteRedirectLocation=[string]` | |
388 | If the bucket is configured as a website, redirects requests for this object to another object in the same bucket or to an external URL. Amazon S3 stores the value of this header in the object metadata. | |
389 | ||
390 | ||
391 | ## Debugging Tips | |
392 | ||
393 | Simply enable `--debug` option to see the full log of s4cmd. If you even need to check what APIs are invoked from s4cmd to boto3, you can run: | |
394 | ||
395 | ``` | |
396 | s4cmd --debug [op] .... 2>&1 >/dev/null | grep S3APICALL | |
397 | ``` | |
398 | ||
399 | To see all the parameters sending to S3 API. | |
400 | ||
401 | ||
402 | ## Compatibility between s3cmd and s4cmd | |
403 | ||
404 | Prefix matching: In s3cmd, unlike traditional filesystems, prefix names match listings: | |
405 | ||
406 | ``` | |
407 | >> s3cmd ls s3://my-bucket/ch | |
408 | s3://my-bucket/charlie/ | |
409 | s3://my-bucket/chyang/ | |
410 | ``` | |
411 | ||
412 | In s4cmd, behavior is the same as with a Unix shell: | |
413 | ||
414 | ``` | |
415 | >>s4cmd ls s3://my-bucket/ch | |
416 | >(empty) | |
417 | ``` | |
418 | ||
419 | To get prefix behavior, use explicit wildcards instead: s4cmd ls s3://my-bucket/ch* | |
420 | ||
421 | Similarly, sync and cp commands emulate the Unix cp command, so directory to | |
422 | directory sync use different syntax: | |
423 | ||
424 | ``` | |
425 | >> s3cmd sync s3://bucket/path/dirA s3://bucket/path/dirB/ | |
426 | ``` | |
427 | will copy contents in dirA to dirB. | |
428 | ``` | |
429 | >> s4cmd sync s3://bucket/path/dirA s3://bucket/path/dirB/ | |
430 | ``` | |
431 | will copy dirA *into* dirB. | |
432 | ||
433 | To achieve the s3cmd behavior, use wildcards: | |
434 | ``` | |
435 | s4cmd sync s3://bucket/path/dirA/* s3://bucket/path/dirB/ | |
436 | ``` | |
437 | ||
438 | Note s4cmd doesn't support dirA without trailing slash indicating dirA/* as | |
439 | what rsync supported. | |
440 | ||
441 | No automatic override for put command: | |
442 | s3cmd put fileA s3://bucket/path/fileB will return error if fileB exists. | |
443 | Use -f as well as get command. | |
444 | ||
445 | Bugfixes for handling of non-existent paths: Often s3cmd creates empty files when specified paths do not exist: | |
446 | s3cmd get s3://my-bucket/no_such_file downloads an empty file. | |
447 | s4cmd get s3://my-bucket/no_such_file returns an error. | |
448 | s3cmd put no_such_file s3://my-bucket/ uploads an empty file. | |
449 | s4cmd put no_such_file s3://my-bucket/ returns an error. | |
450 | ||
451 | ||
452 | ## Additional technical notes | |
453 | ||
454 | Etags, MD5s and multi-part uploads: Traditionally, the etag of an object in S3 | |
455 | has been its MD5. However, this changed with the introduction of S3 multi-part | |
456 | uploads; in this case the etag is still a unique ID, but it is not the MD5 of | |
457 | the file. Amazon has not revealed the definition of the etag in this case, so | |
458 | there is no way we can calculate and compare MD5s based on the etag header in | |
459 | general. The workaround we use is to upload the MD5 as a supplemental content | |
460 | header (called "md5", instead of "etag"). This enables s4cmd to check the MD5 | |
461 | hash before upload or download. The only limitation is that this only works for | |
462 | files uploaded via s4cmd. Programs that do not understand this header will | |
463 | still have to download and verify the MD5 directly. | |
464 | ||
465 | ||
466 | ## Unimplemented features | |
467 | ||
468 | - CloudFront or other feature support beyond basic S3 access. | |
469 | ||
470 | ## Credits | |
471 | ||
472 | * Bloomreach http://www.bloomreach.com | |
473 | * Onera http://www.onera.com | |
474 | ||
475 | Platform: UNKNOWN | |
476 | Description-Content-Type: text/markdown |
0 | [[source]] | |
1 | url = "https://pypi.org/simple" | |
2 | verify_ssl = true | |
3 | name = "pypi" | |
4 | ||
5 | [packages] | |
6 | pytz = "*" | |
7 | "boto3" = "*" | |
8 | ||
9 | [dev-packages] |
0 | { | |
1 | "_meta": { | |
2 | "hash": { | |
3 | "sha256": "180ca5771c768dbbae829c344f28d99b55c16fba08e3b4372ac595b84165662b" | |
4 | }, | |
5 | "pipfile-spec": 6, | |
6 | "requires": { | |
7 | "python_version": "2.7" | |
8 | }, | |
9 | "sources": [ | |
10 | { | |
11 | "name": "pypi", | |
12 | "url": "https://pypi.org/simple", | |
13 | "verify_ssl": true | |
14 | } | |
15 | ] | |
16 | }, | |
17 | "default": { | |
18 | "boto3": { | |
19 | "hashes": [ | |
20 | "sha256:6355384911b7d018d45756281fcf8b1f4d0e5204794fe61787b9e6cd14c74518", | |
21 | "sha256:71833f2d42b2a1c3b2c5456d26ae5d95a458a36efc20eb3a0d7fff1a33e84744" | |
22 | ], | |
23 | "index": "pypi", | |
24 | "version": "==1.7.28" | |
25 | }, | |
26 | "botocore": { | |
27 | "hashes": [ | |
28 | "sha256:1227b6512a7e85247cabbc0ab8a7e28cdcf1a1a725a8f77facb729c354a27593", | |
29 | "sha256:4c452c70e87ee727640e24e9c7b2de9bc91aca3d1f02d573514fa25e92dd4c83" | |
30 | ], | |
31 | "version": "==1.10.28" | |
32 | }, | |
33 | "docutils": { | |
34 | "hashes": [ | |
35 | "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", | |
36 | "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", | |
37 | "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" | |
38 | ], | |
39 | "version": "==0.14" | |
40 | }, | |
41 | "futures": { | |
42 | "hashes": [ | |
43 | "sha256:9ec02aa7d674acb8618afb127e27fde7fc68994c0437ad759fa094a574adb265", | |
44 | "sha256:ec0a6cb848cc212002b9828c3e34c675e0c9ff6741dc445cab6fdd4e1085d1f1" | |
45 | ], | |
46 | "markers": "python_version == '2.6' or python_version == '2.7'", | |
47 | "version": "==3.2.0" | |
48 | }, | |
49 | "jmespath": { | |
50 | "hashes": [ | |
51 | "sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64", | |
52 | "sha256:f11b4461f425740a1d908e9a3f7365c3d2e569f6ca68a2ff8bc5bcd9676edd63" | |
53 | ], | |
54 | "version": "==0.9.3" | |
55 | }, | |
56 | "python-dateutil": { | |
57 | "hashes": [ | |
58 | "sha256:1adb80e7a782c12e52ef9a8182bebeb73f1d7e24e374397af06fb4956c8dc5c0", | |
59 | "sha256:e27001de32f627c22380a688bcc43ce83504a7bc5da472209b4c70f02829f0b8" | |
60 | ], | |
61 | "markers": "python_version >= '2.7'", | |
62 | "version": "==2.7.3" | |
63 | }, | |
64 | "pytz": { | |
65 | "hashes": [ | |
66 | "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555", | |
67 | "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749" | |
68 | ], | |
69 | "index": "pypi", | |
70 | "version": "==2018.4" | |
71 | }, | |
72 | "s3transfer": { | |
73 | "hashes": [ | |
74 | "sha256:90dc18e028989c609146e241ea153250be451e05ecc0c2832565231dacdf59c1", | |
75 | "sha256:c7a9ec356982d5e9ab2d4b46391a7d6a950e2b04c472419f5fdec70cc0ada72f" | |
76 | ], | |
77 | "version": "==0.1.13" | |
78 | }, | |
79 | "six": { | |
80 | "hashes": [ | |
81 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", | |
82 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" | |
83 | ], | |
84 | "version": "==1.11.0" | |
85 | } | |
86 | }, | |
87 | "develop": {} | |
88 | } |
5 | 5 | |
6 | 6 | **Author**: Chou-han Yang ([@chouhanyang](https://github.com/chouhanyang)) |
7 | 7 | |
8 | **Current Maintainers**: Naveen Vardhi ([@rozuur](https://github.com/rozuur)) | Navin Pai ([@navinpai](https://github.com/navinpai)) | |
8 | **Current Maintainers**: Debodirno Chandra ([@debodirno](https://github.com/debodirno)) | Naveen Vardhi ([@rozuur](https://github.com/rozuur)) | Navin Pai ([@navinpai](https://github.com/navinpai)) | |
9 | 9 | |
10 | 10 | ---- |
11 | 11 | |
20 | 20 | - Faster upload with lazy evaluation of md5 hash. |
21 | 21 | - Listing large number of files with S3 pagination, with memory is the limit. |
22 | 22 | - New directory to directory `dsync` command is better and standalone implementation to replace old `sync` command, which is implemented based on top of get/put/mv commands. `--delete-removed` work for all cases including local to s3, s3 to local, and s3 to s3. `sync` command preserves the old behavior in this version for compatibility. |
23 | - Tested on both python 2 and 3. | |
23 | - [Support for S3 compatible storage services](https://github.com/bloomreach/s4cmd/issues/52) such as DreamHost and Cloudian using `--endpoint-url` (Community Supported Beta Feature). | |
24 | - Tested on both Python 2.7, 3.6, 3.7, 3.8, 3.9 and nightly. | |
24 | 25 | - Special thanks to [onera.com](http://www.onera.com) for supporting s4cmd. |
25 | 26 | |
26 | 27 |
0 | _s4cmd () | |
1 | { | |
2 | local IFS=$' \n' | |
3 | local cur prev words cword possibleparams | |
4 | command=${COMP_WORDS[1]} | |
5 | prev=${COMP_WORDS[COMP_CWORD-1]} | |
6 | prevprev=${COMP_WORDS[COMP_CWORD-2]} | |
7 | _init_completion || return | |
8 | ||
9 | if [[ $cword -eq 1 ]]; then | |
10 | #command | |
11 | COMPREPLY=($( compgen -W 'ls put get cp mv sync del du' -- "$cur" )) | |
12 | COMPREPLY=("${COMPREPLY[@]/%/ }") | |
13 | elif [[ $cword -ge 2 ]]; then | |
14 | params=${COMP_WORDS[@]:2} | |
15 | if [ "$prevprev" == "s3" ] | |
16 | then | |
17 | #get initial bucket names | |
18 | if [[ $cur =~ ^\/\/$ ]]; then | |
19 | buckets=($(s4cmd ls 2>/dev/null | grep -Eo "s3://.*" | grep -Eo "//.*")) | |
20 | COMPREPLY=($(compgen -W "$buckets" -- "$cur")) | |
21 | return 0 | |
22 | ||
23 | #help get buckets full names | |
24 | elif [[ $cur =~ ^\/\/[^\/]*$ ]]; then | |
25 | buckets=($(s4cmd ls 2>/dev/null | grep -Eo "s3://.*" | grep -Eo "//.*")) | |
26 | COMPREPLY=($(compgen -W "$buckets" -- "$cur")) | |
27 | return 0 | |
28 | ||
29 | #get contents of dir | |
30 | elif [[ $cur =~ ^\/\/.*\/$ ]]; then | |
31 | buckets=($(s4cmd ls s3:$cur 2>/dev/null | grep -Eo "s3://.*" | grep -Eo "//.*")) | |
32 | COMPREPLY=($(compgen -W "$buckets" -- "$cur")) | |
33 | return 0 | |
34 | ||
35 | #help get buckets contents | |
36 | elif [[ $cur =~ ^\/\/.*\/.*$ ]]; then | |
37 | checkdir=($(dirname "$cur")) | |
38 | buckets=($(s4cmd ls s3:$checkdir 2>/dev/null | grep -Eo "s3://.*" | grep -Eo "//.*")) | |
39 | COMPREPLY=($(compgen -W "$buckets" -- "$cur")) | |
40 | return 0 | |
41 | fi | |
42 | fi | |
43 | case "$command" in | |
44 | ls) | |
45 | COMPREPLY=($(compgen -W "--recursive --show-directory" -- "$cur")) | |
46 | COMPREPLY=("${COMPREPLY[@]/%/ }") | |
47 | return 0 | |
48 | ;; | |
49 | put|get|sync|cp|mv) | |
50 | COMPREPLY=($(compgen -W "--recursive --sync-check --force --dry-run" -- "$cur")) | |
51 | COMPREPLY=("${COMPREPLY[@]/%/ }") | |
52 | return 0 | |
53 | ;; | |
54 | del) | |
55 | COMPREPLY=($(compgen -W "--recursive --dry-run" -- "$cur")) | |
56 | COMPREPLY=("${COMPREPLY[@]/%/ }") | |
57 | return 0 | |
58 | ;; | |
59 | du) | |
60 | COMPREPLY=($(compgen -W "--recursive" -- "$cur")) | |
61 | COMPREPLY=("${COMPREPLY[@]/%/ }") | |
62 | return 0 | |
63 | ;; | |
64 | esac | |
65 | fi | |
66 | } && | |
67 | complete -o nospace -o default -F _s4cmd s4cmd | |
68 |
0 | #!/bin/bash | |
1 | ||
2 | # | |
3 | # Copyright 2012-2018 BloomReach, Inc. | |
4 | # | |
5 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
6 | # you may not use this file except in compliance with the License. | |
7 | # You may obtain a copy of the License at | |
8 | # | |
9 | # http://www.apache.org/licenses/LICENSE-2.0 | |
10 | # | |
11 | # Unless required by applicable law or agreed to in writing, software | |
12 | # distributed under the License is distributed on an "AS IS" BASIS, | |
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
14 | # See the License for the specific language governing permissions and | |
15 | # limitations under the License. | |
16 | # | |
17 | ||
18 | # | |
19 | # Tests for s4cmd command line tool | |
20 | # | |
21 | ||
22 | # Python settings, can be python 2 or 3 | |
23 | PYTHON=${PYTHON:-python} | |
24 | PYTHONPATH=${PYTHONPATH:-$(pwd)} | |
25 | BUILD_ID=${BUILD_ID:-0} | |
26 | LOCALDIR=./test-tmp | |
27 | REMOTEDIR=${REMOTEDIR:-"s3://bucket/path"} | |
28 | REMOTEDIR="${REMOTEDIR}/${BUILD_ID}/$(${PYTHON} --version 2>&1 | cut -d' ' -f 2)" | |
29 | S4CMD="${PYTHON} $(pwd)/s4cmd.py" | |
30 | S4CMD_OPTS=${S4CMD_OPTS:-"--debug"} | |
31 | FILESIZE=1M | |
32 | TEST_FAILED=false | |
33 | ||
34 | function initialize { | |
35 | # Create testing data locally | |
36 | rm -rf $LOCALDIR | |
37 | mkdir $LOCALDIR | |
38 | pushd $LOCALDIR | |
39 | ||
40 | mkdir source | |
41 | pushd source | |
42 | dd if=/dev/urandom of=001 bs=$FILESIZE count=2 | |
43 | dd if=/dev/urandom of=010 bs=$FILESIZE count=2 | |
44 | dd if=/dev/urandom of=101 bs=$FILESIZE count=2 | |
45 | touch 011 | |
46 | chmod 700 001 | |
47 | chmod 770 010 | |
48 | chmod 707 101 | |
49 | mkdir a | |
50 | pushd a | |
51 | mkdir a | |
52 | pushd a | |
53 | dd if=/dev/urandom of=a-001 bs=$FILESIZE count=2 | |
54 | dd if=/dev/urandom of=a-010 bs=$FILESIZE count=2 | |
55 | dd if=/dev/urandom of=a-101 bs=$FILESIZE count=2 | |
56 | popd | |
57 | mkdir a1 | |
58 | pushd a1 | |
59 | dd if=/dev/urandom of=a1-001 bs=$FILESIZE count=2 | |
60 | dd if=/dev/urandom of=a1-010 bs=$FILESIZE count=2 | |
61 | dd if=/dev/urandom of=a1-101 bs=$FILESIZE count=2 | |
62 | popd | |
63 | mkdir a2 | |
64 | pushd a2 | |
65 | dd if=/dev/urandom of=a2-001 bs=$FILESIZE count=2 | |
66 | dd if=/dev/urandom of=a2-010 bs=$FILESIZE count=2 | |
67 | dd if=/dev/urandom of=a2-101 bs=$FILESIZE count=2 | |
68 | popd | |
69 | mkdir a3 | |
70 | pushd a3 | |
71 | dd if=/dev/urandom of=a3-001 bs=$FILESIZE count=2 | |
72 | dd if=/dev/urandom of=a3-010 bs=$FILESIZE count=2 | |
73 | dd if=/dev/urandom of=a3-101 bs=$FILESIZE count=2 | |
74 | popd | |
75 | popd | |
76 | ||
77 | mkdir b | |
78 | pushd b | |
79 | mkdir b | |
80 | pushd b | |
81 | dd if=/dev/urandom of=b-001 bs=$FILESIZE count=2 | |
82 | dd if=/dev/urandom of=b-010 bs=$FILESIZE count=2 | |
83 | dd if=/dev/urandom of=b-101 bs=$FILESIZE count=2 | |
84 | popd | |
85 | mkdir b1 | |
86 | pushd b1 | |
87 | dd if=/dev/urandom of=b1-001 bs=$FILESIZE count=2 | |
88 | dd if=/dev/urandom of=b1-010 bs=$FILESIZE count=2 | |
89 | dd if=/dev/urandom of=b1-101 bs=$FILESIZE count=2 | |
90 | popd | |
91 | mkdir b2 | |
92 | pushd b2 | |
93 | dd if=/dev/urandom of=b2-001 bs=$FILESIZE count=2 | |
94 | dd if=/dev/urandom of=b2-010 bs=$FILESIZE count=2 | |
95 | dd if=/dev/urandom of=b2-101 bs=$FILESIZE count=2 | |
96 | popd | |
97 | mkdir b3 | |
98 | pushd b3 | |
99 | dd if=/dev/urandom of=b3-001 bs=$FILESIZE count=2 | |
100 | dd if=/dev/urandom of=b3-010 bs=$FILESIZE count=2 | |
101 | dd if=/dev/urandom of=b3-101 bs=$FILESIZE count=2 | |
102 | popd | |
103 | popd | |
104 | ||
105 | mkdir c | |
106 | pushd c | |
107 | mkdir c | |
108 | pushd c | |
109 | dd if=/dev/urandom of=c-001 bs=$FILESIZE count=2 | |
110 | dd if=/dev/urandom of=c-010 bs=$FILESIZE count=2 | |
111 | dd if=/dev/urandom of=c-101 bs=$FILESIZE count=2 | |
112 | popd | |
113 | mkdir c1 | |
114 | pushd c1 | |
115 | dd if=/dev/urandom of=c1-001 bs=$FILESIZE count=2 | |
116 | dd if=/dev/urandom of=c1-010 bs=$FILESIZE count=2 | |
117 | dd if=/dev/urandom of=c1-101 bs=$FILESIZE count=2 | |
118 | popd | |
119 | mkdir c2 | |
120 | pushd c2 | |
121 | dd if=/dev/urandom of=c2-001 bs=$FILESIZE count=2 | |
122 | dd if=/dev/urandom of=c2-010 bs=$FILESIZE count=2 | |
123 | dd if=/dev/urandom of=c2-101 bs=$FILESIZE count=2 | |
124 | popd | |
125 | mkdir c3 | |
126 | pushd c3 | |
127 | dd if=/dev/urandom of=c3-001 bs=$FILESIZE count=2 | |
128 | dd if=/dev/urandom of=c3-010 bs=$FILESIZE count=2 | |
129 | dd if=/dev/urandom of=c3-101 bs=$FILESIZE count=2 | |
130 | popd | |
131 | popd | |
132 | ||
133 | popd | |
134 | ||
135 | # Clear target testing directory | |
136 | $S4CMD del ${S4CMD_OPTS} -r $REMOTEDIR/ | |
137 | ||
138 | echo 'Start test cases' | |
139 | ||
140 | tree source | tail -n +2 > source.tree | |
141 | ||
142 | popd # from local-tmp | |
143 | } | |
144 | ||
145 | function case1-1 { | |
146 | ##################################################################### | |
147 | CASE_ID=${FUNCNAME[0]} | |
148 | echo "Test $CASE_ID: single file upload/download" | |
149 | ##################################################################### | |
150 | mkdir $CASE_ID | |
151 | $S4CMD put ${S4CMD_OPTS} source/001 $REMOTEDIR/$CASE_ID/001 >> $CASE_ID.log 2>&1 | |
152 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001 $CASE_ID/001 >> $CASE_ID.log 2>&1 | |
153 | ||
154 | md5sum source/001 | cut -f1 -d' ' >> $CASE_ID.md5 | |
155 | md5sum $CASE_ID/001 | cut -f1 -d' ' >> $CASE_ID.chk | |
156 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
157 | if [[ -z "$result" ]]; then | |
158 | echo " - OK" | |
159 | else | |
160 | echo " - Failed" | |
161 | TEST_FAILED=true | |
162 | fi | |
163 | } | |
164 | ||
165 | function case1-2 { | |
166 | ##################################################################### | |
167 | CASE_ID=${FUNCNAME[0]} | |
168 | echo "Test $CASE_ID: single file upload/download (Trailing slash)" | |
169 | ##################################################################### | |
170 | mkdir $CASE_ID | |
171 | $S4CMD put ${S4CMD_OPTS} source/001 $REMOTEDIR/$CASE_ID/001-1/ >> $CASE_ID.log 2>&1 | |
172 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001-1/001 $CASE_ID/001-1 >> $CASE_ID.log 2>&1 | |
173 | ||
174 | md5sum source/001 | cut -f1 -d' ' >> $CASE_ID.md5 | |
175 | md5sum $CASE_ID/001-1 | cut -f1 -d' ' >> $CASE_ID.chk | |
176 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
177 | if [[ -z "$result" ]]; then | |
178 | echo " - OK" | |
179 | else | |
180 | echo " - Failed" | |
181 | TEST_FAILED=true | |
182 | fi | |
183 | } | |
184 | ||
185 | function case1-3 { | |
186 | ##################################################################### | |
187 | CASE_ID=${FUNCNAME[0]} | |
188 | echo "Test $CASE_ID: Empty file upload/download" | |
189 | ##################################################################### | |
190 | mkdir $CASE_ID | |
191 | $S4CMD put ${S4CMD_OPTS} source/011 $REMOTEDIR/$CASE_ID/011 >> $CASE_ID.log 2>&1 | |
192 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/011 $CASE_ID/011 >> $CASE_ID.log 2>&1 | |
193 | ||
194 | md5sum source/011 | cut -f1 -d' ' >> $CASE_ID.md5 | |
195 | md5sum $CASE_ID/011 | cut -f1 -d' ' >> $CASE_ID.chk | |
196 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
197 | if [[ -z "$result" ]]; then | |
198 | echo " - OK" | |
199 | else | |
200 | echo " - Failed" | |
201 | TEST_FAILED=true | |
202 | fi | |
203 | } | |
204 | ||
205 | function case2-1 { | |
206 | ##################################################################### | |
207 | CASE_ID=${FUNCNAME[0]} | |
208 | echo "Test $CASE_ID: wildcards upload" | |
209 | ##################################################################### | |
210 | mkdir $CASE_ID | |
211 | $S4CMD put ${S4CMD_OPTS} source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
212 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/* $CASE_ID/ >> $CASE_ID.log 2>&1 | |
213 | ||
214 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
215 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
216 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
217 | if [[ -z "$result" ]]; then | |
218 | echo " - OK" | |
219 | else | |
220 | echo " - Failed" | |
221 | TEST_FAILED=true | |
222 | fi | |
223 | } | |
224 | ||
225 | function case2-2 { | |
226 | ##################################################################### | |
227 | CASE_ID=${FUNCNAME[0]} | |
228 | echo "Test $CASE_ID: wildcards upload (trailing slash)" | |
229 | ##################################################################### | |
230 | mkdir $CASE_ID | |
231 | $S4CMD put ${S4CMD_OPTS} source/*/?2/b?-1?1 $REMOTEDIR/$CASE_ID/a >> $CASE_ID.log 2>&1 | |
232 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/a $CASE_ID/ >> $CASE_ID.log 2>&1 | |
233 | ||
234 | md5sum source/*/?2/b?-1?1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
235 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
236 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
237 | if [[ -z "$result" ]]; then | |
238 | echo " - OK" | |
239 | else | |
240 | echo " - Failed" | |
241 | TEST_FAILED=true | |
242 | fi | |
243 | } | |
244 | ||
245 | function case3-1 { | |
246 | ##################################################################### | |
247 | CASE_ID=${FUNCNAME[0]} | |
248 | echo "Test $CASE_ID: list files" | |
249 | ##################################################################### | |
250 | $S4CMD put ${S4CMD_OPTS} source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
251 | $S4CMD ls ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ >> $CASE_ID.out 2> $CASE_ID.err | |
252 | ||
253 | n1=$(ls source/*/?2/*-??1 | wc -l) | |
254 | n2=$(cat $CASE_ID.out | wc -l) | |
255 | if [[ "$n1" -eq "$n2" ]]; then | |
256 | echo " - OK" | |
257 | else | |
258 | echo " - Failed" | |
259 | TEST_FAILED=true | |
260 | fi | |
261 | } | |
262 | ||
263 | function case3-2 { | |
264 | ##################################################################### | |
265 | CASE_ID=${FUNCNAME[0]} | |
266 | echo "Test $CASE_ID: list files (show directory)" | |
267 | ##################################################################### | |
268 | $S4CMD put ${S4CMD_OPTS} source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
269 | $S4CMD ls ${S4CMD_OPTS} -d $REMOTEDIR/$CASE_ID >> $CASE_ID.out 2> $CASE_ID.err | |
270 | ||
271 | n1=$(cat $CASE_ID.out | wc -l) | |
272 | if [[ "$n1" -eq "1" ]]; then | |
273 | echo " - OK" | |
274 | else | |
275 | echo " - Failed" | |
276 | TEST_FAILED=true | |
277 | fi | |
278 | } | |
279 | ||
280 | function case4-1 { | |
281 | ##################################################################### | |
282 | CASE_ID=${FUNCNAME[0]} | |
283 | echo "Test $CASE_ID: single directory (upload and download)" | |
284 | ##################################################################### | |
285 | $S4CMD put ${S4CMD_OPTS} -r source/a/a1/ $REMOTEDIR/$CASE_ID >> $CASE_ID.log 2>&1 | |
286 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID/ $CASE_ID >> $CASE_ID.log 2>&1 | |
287 | ||
288 | md5sum source/a/a1/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
289 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
290 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
291 | if [[ -z "$result" ]]; then | |
292 | echo " - OK" | |
293 | else | |
294 | echo " - Failed" | |
295 | TEST_FAILED=true | |
296 | fi | |
297 | } | |
298 | ||
299 | function case4-2 { | |
300 | ##################################################################### | |
301 | CASE_ID=${FUNCNAME[0]} | |
302 | echo "Test $CASE_ID: single directory (Trailing slash)" | |
303 | ##################################################################### | |
304 | $S4CMD put ${S4CMD_OPTS} -r source/a/a1/ $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
305 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID/a1/ $CASE_ID >> $CASE_ID.log 2>&1 | |
306 | ||
307 | md5sum source/a/a1/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
308 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
309 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
310 | if [[ -z "$result" ]]; then | |
311 | echo " - OK" | |
312 | else | |
313 | echo " - Failed" | |
314 | TEST_FAILED=true | |
315 | fi | |
316 | } | |
317 | ||
318 | function case4-3 { | |
319 | ##################################################################### | |
320 | CASE_ID=${FUNCNAME[0]} | |
321 | echo "Test $CASE_ID: single directory (Wildcards)" | |
322 | ##################################################################### | |
323 | mkdir $CASE_ID | |
324 | $S4CMD put ${S4CMD_OPTS} -r source/a/a?/ $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
325 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID/a?/ $CASE_ID >> $CASE_ID.log 2>&1 | |
326 | ||
327 | md5sum source/a/a?/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
328 | md5sum $CASE_ID/*/* | cut -f1 -d' ' >> $CASE_ID.chk | |
329 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
330 | if [[ -z "$result" ]]; then | |
331 | echo " - OK" | |
332 | else | |
333 | echo " - Failed" | |
334 | TEST_FAILED=true | |
335 | fi | |
336 | } | |
337 | ||
338 | function case4-4 { | |
339 | ##################################################################### | |
340 | CASE_ID=${FUNCNAME[0]} | |
341 | echo "Test $CASE_ID: single directory (prefix)" | |
342 | ##################################################################### | |
343 | mkdir $CASE_ID | |
344 | $S4CMD put ${S4CMD_OPTS} -r source/a/a $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
345 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID/a $CASE_ID >> $CASE_ID.log 2>&1 | |
346 | ||
347 | md5sum source/a/a/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
348 | md5sum $CASE_ID/*/* | cut -f1 -d' ' >> $CASE_ID.chk | |
349 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
350 | if [[ -z "$result" ]]; then | |
351 | echo " - OK" | |
352 | else | |
353 | echo " - Failed" | |
354 | TEST_FAILED=true | |
355 | fi | |
356 | } | |
357 | ||
358 | function case5-1 { | |
359 | ##################################################################### | |
360 | CASE_ID=${FUNCNAME[0]} | |
361 | echo "Test $CASE_ID: trailing slash" | |
362 | ##################################################################### | |
363 | $S4CMD put ${S4CMD_OPTS} -r source/a/a1 $REMOTEDIR/$CASE_ID >> $CASE_ID.log 2>&1 | |
364 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID $CASE_ID >> $CASE_ID.log 2>&1 | |
365 | ||
366 | md5sum source/a/a1/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
367 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
368 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
369 | if [[ -z "$result" ]]; then | |
370 | echo " - OK" | |
371 | else | |
372 | echo " - Failed" | |
373 | TEST_FAILED=true | |
374 | fi | |
375 | } | |
376 | ||
377 | function case5-2 { | |
378 | ##################################################################### | |
379 | CASE_ID=${FUNCNAME[0]} | |
380 | echo "Test $CASE_ID: trailing slash (normalization)" | |
381 | ##################################################################### | |
382 | $S4CMD put ${S4CMD_OPTS} -r source/a/a1/ $REMOTEDIR/$CASE_ID >> $CASE_ID.log 2>&1 | |
383 | $S4CMD get ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID $CASE_ID/ >> $CASE_ID.log 2>&1 | |
384 | ||
385 | md5sum source/a/a1/* | cut -f1 -d' ' >> $CASE_ID.md5 | |
386 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
387 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
388 | if [[ -z "$result" ]]; then | |
389 | echo " - OK" | |
390 | else | |
391 | echo " - Failed" | |
392 | TEST_FAILED=true | |
393 | fi | |
394 | } | |
395 | ||
396 | function case6-1 { | |
397 | ##################################################################### | |
398 | CASE_ID=${FUNCNAME[0]} | |
399 | echo "Test $CASE_ID: syncing up and down" | |
400 | ##################################################################### | |
401 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
402 | $S4CMD sync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ $CASE_ID/ >> $CASE_ID.log 2>&1 | |
403 | tree $CASE_ID/source | tail -n +2 >> $CASE_ID.tree | |
404 | ||
405 | result=$(diff source.tree $CASE_ID.tree) | |
406 | if [[ -z "$result" ]]; then | |
407 | echo " - OK" | |
408 | else | |
409 | echo " - Failed" | |
410 | TEST_FAILED=true | |
411 | fi | |
412 | } | |
413 | ||
414 | function case6-2 { | |
415 | ##################################################################### | |
416 | CASE_ID=${FUNCNAME[0]} | |
417 | echo "Test $CASE_ID: syncing up and down (current directory)" | |
418 | ##################################################################### | |
419 | cd source | |
420 | $S4CMD sync ${S4CMD_OPTS} ./ $REMOTEDIR/$CASE_ID/source >> ../$CASE_ID.log 2>&1 | |
421 | cd .. | |
422 | mkdir $CASE_ID | |
423 | cd $CASE_ID | |
424 | $S4CMD sync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source ./ >> ../$CASE_ID.log 2>&1 | |
425 | cd .. | |
426 | tree $CASE_ID/source | tail -n +2 >> $CASE_ID.tree | |
427 | ||
428 | result=$(diff source.tree $CASE_ID.tree) | |
429 | if [[ -z "$result" ]]; then | |
430 | echo " - OK" | |
431 | else | |
432 | echo " - Failed" | |
433 | TEST_FAILED=true | |
434 | fi | |
435 | } | |
436 | ||
437 | function obsolete_case6-x { | |
438 | ##################################################################### | |
439 | CASE_ID=${FUNCNAME[0]} | |
440 | echo "Test $CASE_ID: syncing up and down (--delete-removed)" | |
441 | ##################################################################### | |
442 | mkdir $CASE_ID-1 | |
443 | mkdir $CASE_ID-2 | |
444 | cp -r source/a $CASE_ID-1/ | |
445 | $S4CMD sync ${S4CMD_OPTS} $CASE_ID-1/a $REMOTEDIR/$CASE_ID-1/ >> $CASE_ID.log 2>&1 | |
446 | $S4CMD sync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID-1/a $REMOTEDIR/$CASE_ID-2/ >> $CASE_ID.log 2>&1 | |
447 | $S4CMD sync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID-2/a $CASE_ID-2/ >> $CASE_ID.log 2>&1 | |
448 | ||
449 | rm $CASE_ID-1/a/*/*-010 | |
450 | ||
451 | $S4CMD sync ${S4CMD_OPTS} --delete-removed $CASE_ID-1/a $REMOTEDIR/$CASE_ID-1/ >> $CASE_ID.log 2>&1 | |
452 | $S4CMD sync ${S4CMD_OPTS} --delete-removed $REMOTEDIR/$CASE_ID-1/a $REMOTEDIR/$CASE_ID-2/ >> $CASE_ID.log 2>&1 | |
453 | $S4CMD sync ${S4CMD_OPTS} --delete-removed $REMOTEDIR/$CASE_ID-2/a $CASE_ID-2/ >> $CASE_ID.log 2>&1 | |
454 | ||
455 | tree $CASE_ID-1/a | tail -n +2 >> $CASE_ID.tree1 | |
456 | tree $CASE_ID-2/a | tail -n +2 >> $CASE_ID.tree2 | |
457 | ||
458 | result=$(diff $CASE_ID.tree1 $CASE_ID.tree2) | |
459 | if [[ -z "$result" ]]; then | |
460 | echo " - OK" | |
461 | else | |
462 | echo " - Failed" | |
463 | TEST_FAILED=true | |
464 | fi | |
465 | } | |
466 | ||
467 | function case6-3 { | |
468 | ##################################################################### | |
469 | CASE_ID=${FUNCNAME[0]} | |
470 | echo "Test $CASE_ID: dsyncing up and down" | |
471 | ##################################################################### | |
472 | $S4CMD dsync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID/source >> $CASE_ID.log 2>&1 | |
473 | $S4CMD dsync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ $CASE_ID/ >> $CASE_ID.log 2>&1 | |
474 | tree $CASE_ID/source | tail -n +2 >> $CASE_ID.tree | |
475 | ||
476 | result=$(diff source.tree $CASE_ID.tree) | |
477 | if [[ -z "$result" ]]; then | |
478 | echo " - OK" | |
479 | else | |
480 | echo " - Failed" | |
481 | TEST_FAILED=true | |
482 | fi | |
483 | } | |
484 | ||
485 | function case6-4 { | |
486 | ##################################################################### | |
487 | CASE_ID=${FUNCNAME[0]} | |
488 | echo "Test $CASE_ID: dsyncing up and down (current directory)" | |
489 | ##################################################################### | |
490 | cd source | |
491 | $S4CMD dsync ${S4CMD_OPTS} ./ $REMOTEDIR/$CASE_ID/source >> ../$CASE_ID.log 2>&1 | |
492 | cd .. | |
493 | mkdir $CASE_ID | |
494 | cd $CASE_ID | |
495 | $S4CMD dsync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source ./ >> ../$CASE_ID.log 2>&1 | |
496 | cd .. | |
497 | tree $CASE_ID/ | tail -n +2 >> $CASE_ID.tree | |
498 | ||
499 | result=$(diff source.tree $CASE_ID.tree) | |
500 | if [[ -z "$result" ]]; then | |
501 | echo " - OK" | |
502 | else | |
503 | echo " - Failed" | |
504 | TEST_FAILED=true | |
505 | fi | |
506 | } | |
507 | ||
508 | function case6-5 { | |
509 | ##################################################################### | |
510 | CASE_ID=${FUNCNAME[0]} | |
511 | echo "Test $CASE_ID: dsyncing up and down (--delete-removed)" | |
512 | ##################################################################### | |
513 | mkdir $CASE_ID-1 | |
514 | mkdir $CASE_ID-2 | |
515 | cp -r source/a $CASE_ID-1/ | |
516 | $S4CMD dsync ${S4CMD_OPTS} $CASE_ID-1/a $REMOTEDIR/$CASE_ID-1/a >> $CASE_ID.log 2>&1 | |
517 | $S4CMD dsync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID-1/a $REMOTEDIR/$CASE_ID-2/a >> $CASE_ID.log 2>&1 | |
518 | $S4CMD dsync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID-2/a $CASE_ID-2/a >> $CASE_ID.log 2>&1 | |
519 | ||
520 | rm $CASE_ID-1/a/*/*-010 | |
521 | ||
522 | $S4CMD dsync ${S4CMD_OPTS} --delete-removed $CASE_ID-1/a $REMOTEDIR/$CASE_ID-1/a >> $CASE_ID.log 2>&1 | |
523 | $S4CMD dsync ${S4CMD_OPTS} --delete-removed $REMOTEDIR/$CASE_ID-1/a $REMOTEDIR/$CASE_ID-2/a >> $CASE_ID.log 2>&1 | |
524 | $S4CMD dsync ${S4CMD_OPTS} --delete-removed $REMOTEDIR/$CASE_ID-2/a $CASE_ID-2/a >> $CASE_ID.log 2>&1 | |
525 | ||
526 | tree $CASE_ID-1/a | tail -n +2 >> $CASE_ID.tree1 | |
527 | tree $CASE_ID-2/a | tail -n +2 >> $CASE_ID.tree2 | |
528 | ||
529 | result=$(diff $CASE_ID.tree1 $CASE_ID.tree2) | |
530 | if [[ -z "$result" ]]; then | |
531 | echo " - OK" | |
532 | else | |
533 | echo " - Failed" | |
534 | TEST_FAILED=true | |
535 | fi | |
536 | } | |
537 | ||
538 | function case7-1 { | |
539 | ##################################################################### | |
540 | CASE_ID=${FUNCNAME[0]} | |
541 | echo "Test $CASE_ID: wildcard download" | |
542 | ##################################################################### | |
543 | mkdir $CASE_ID | |
544 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
545 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source/*/?2/*-??1 $CASE_ID/ >> $CASE_ID.log 2>&1 | |
546 | ||
547 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
548 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
549 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
550 | if [[ -z "$result" ]]; then | |
551 | echo " - OK" | |
552 | else | |
553 | echo " - Failed" | |
554 | TEST_FAILED=true | |
555 | fi | |
556 | } | |
557 | ||
558 | function case7-2 { | |
559 | ##################################################################### | |
560 | CASE_ID=${FUNCNAME[0]} | |
561 | echo "Test $CASE_ID: wildcard download (trailing slash)" | |
562 | ##################################################################### | |
563 | mkdir $CASE_ID | |
564 | $S4CMD sync ${S4CMD_OPTS} source/ $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
565 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source/*/?2/*-??1 $CASE_ID >> $CASE_ID.log 2>&1 | |
566 | ||
567 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
568 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
569 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
570 | if [[ -z "$result" ]]; then | |
571 | echo " - OK" | |
572 | else | |
573 | echo " - Failed" | |
574 | TEST_FAILED=true | |
575 | fi | |
576 | } | |
577 | ||
578 | function case8-1 { | |
579 | ##################################################################### | |
580 | CASE_ID=${FUNCNAME[0]} | |
581 | echo "Test $CASE_ID: single file copy" | |
582 | ##################################################################### | |
583 | mkdir $CASE_ID | |
584 | $S4CMD put ${S4CMD_OPTS} source/001 $REMOTEDIR/$CASE_ID/001_copy >> $CASE_ID.log 2>&1 | |
585 | $S4CMD cp ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001_copy $REMOTEDIR/$CASE_ID/001 >> $CASE_ID.log 2>&1 | |
586 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001 $CASE_ID/001 >> $CASE_ID.log 2>&1 | |
587 | ||
588 | md5sum source/001 | cut -f1 -d' ' >> $CASE_ID.md5 | |
589 | md5sum $CASE_ID/001 | cut -f1 -d' ' >> $CASE_ID.chk | |
590 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
591 | if [[ -z "$result" ]]; then | |
592 | echo " - OK" | |
593 | else | |
594 | echo " - Failed" | |
595 | TEST_FAILED=true | |
596 | fi | |
597 | } | |
598 | ||
599 | function case8-2 { | |
600 | ##################################################################### | |
601 | CASE_ID=${FUNCNAME[0]} | |
602 | echo "Test $CASE_ID: recursive copy" | |
603 | ##################################################################### | |
604 | mkdir $CASE_ID | |
605 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID-copy/ >> $CASE_ID.log 2>&1 | |
606 | $S4CMD cp ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID-copy $REMOTEDIR/$CASE_ID >> $CASE_ID.log 2>&1 | |
607 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source/*/?2/*-??1 $CASE_ID/ >> $CASE_ID.log 2>&1 | |
608 | ||
609 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
610 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
611 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
612 | if [[ -z "$result" ]]; then | |
613 | echo " - OK" | |
614 | else | |
615 | echo " - Failed" | |
616 | TEST_FAILED=true | |
617 | fi | |
618 | } | |
619 | ||
620 | function case8-3 { | |
621 | ##################################################################### | |
622 | CASE_ID=${FUNCNAME[0]} | |
623 | echo "Test $CASE_ID: wildcards copy" | |
624 | ##################################################################### | |
625 | mkdir $CASE_ID | |
626 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID-copy/ >> $CASE_ID.log 2>&1 | |
627 | $S4CMD cp ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID-copy/source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
628 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/* $CASE_ID/ >> $CASE_ID.log 2>&1 | |
629 | ||
630 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
631 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
632 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
633 | if [[ -z "$result" ]]; then | |
634 | echo " - OK" | |
635 | else | |
636 | echo " - Failed" | |
637 | TEST_FAILED=true | |
638 | fi | |
639 | } | |
640 | ||
641 | function case9-1 { | |
642 | ##################################################################### | |
643 | CASE_ID=${FUNCNAME[0]} | |
644 | echo "Test $CASE_ID: single file move" | |
645 | ##################################################################### | |
646 | mkdir $CASE_ID | |
647 | $S4CMD put ${S4CMD_OPTS} source/001 $REMOTEDIR/$CASE_ID/001_copy >> $CASE_ID.log 2>&1 | |
648 | $S4CMD mv ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001_copy $REMOTEDIR/$CASE_ID/001 >> $CASE_ID.log 2>&1 | |
649 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/001 $CASE_ID/001 >> $CASE_ID.log 2>&1 | |
650 | ||
651 | md5sum source/001 | cut -f1 -d' ' >> $CASE_ID.md5 | |
652 | md5sum $CASE_ID/001 | cut -f1 -d' ' >> $CASE_ID.chk | |
653 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
654 | if [[ -z "$result" ]]; then | |
655 | echo " - OK" | |
656 | else | |
657 | echo " - Failed" | |
658 | TEST_FAILED=true | |
659 | fi | |
660 | } | |
661 | ||
662 | function case9-2 { | |
663 | ##################################################################### | |
664 | CASE_ID=${FUNCNAME[0]} | |
665 | echo "Test $CASE_ID: recursive move" | |
666 | ##################################################################### | |
667 | mkdir $CASE_ID | |
668 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID-copy/ >> $CASE_ID.log 2>&1 | |
669 | $S4CMD mv ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID-copy $REMOTEDIR/$CASE_ID >> $CASE_ID.log 2>&1 | |
670 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/source/*/?2/*-??1 $CASE_ID/ >> $CASE_ID.log 2>&1 | |
671 | ||
672 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
673 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
674 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
675 | if [[ -z "$result" ]]; then | |
676 | echo " - OK" | |
677 | else | |
678 | echo " - Failed" | |
679 | TEST_FAILED=true | |
680 | fi | |
681 | } | |
682 | ||
683 | function case9-3 { | |
684 | ##################################################################### | |
685 | CASE_ID=${FUNCNAME[0]} | |
686 | echo "Test $CASE_ID: wildcards move" | |
687 | ##################################################################### | |
688 | mkdir $CASE_ID | |
689 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID-copy/ >> $CASE_ID.log 2>&1 | |
690 | $S4CMD mv ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID-copy/source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
691 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/* $CASE_ID/ >> $CASE_ID.log 2>&1 | |
692 | ||
693 | md5sum source/*/?2/*-??1 | cut -f1 -d' ' >> $CASE_ID.md5 | |
694 | md5sum $CASE_ID/* | cut -f1 -d' ' >> $CASE_ID.chk | |
695 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
696 | if [[ -z "$result" ]]; then | |
697 | echo " - OK" | |
698 | else | |
699 | echo " - Failed" | |
700 | TEST_FAILED=true | |
701 | fi | |
702 | } | |
703 | ||
704 | function case10-1 { | |
705 | ##################################################################### | |
706 | CASE_ID=${FUNCNAME[0]} | |
707 | echo "Test $CASE_ID: get size with du" | |
708 | ##################################################################### | |
709 | $S4CMD put ${S4CMD_OPTS} source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
710 | $S4CMD du ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ >> $CASE_ID.out 2> $CASE_ID.err | |
711 | ||
712 | s=$(cat $CASE_ID.out | cut -f1) | |
713 | if [[ "$s" -eq "12582912" ]]; then | |
714 | echo " - OK" | |
715 | else | |
716 | echo " - Failed" | |
717 | TEST_FAILED=true | |
718 | fi | |
719 | } | |
720 | ||
721 | function case10-2 { | |
722 | ##################################################################### | |
723 | CASE_ID=${FUNCNAME[0]} | |
724 | echo "Test $CASE_ID: get total size (experimental)" | |
725 | ##################################################################### | |
726 | $S4CMD put ${S4CMD_OPTS} source/*/?2/*-??1 $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
727 | $S4CMD _totalsize ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID $REMOTEDIR/$CASE_ID >> $CASE_ID.out 2> $CASE_ID.err | |
728 | ||
729 | s=$(cat $CASE_ID.out | cut -f1) | |
730 | if [[ "$s" -eq "25165824" ]]; then | |
731 | echo " - OK" | |
732 | else | |
733 | echo " - Failed" | |
734 | TEST_FAILED=true | |
735 | fi | |
736 | } | |
737 | ||
738 | function case11 { | |
739 | ##################################################################### | |
740 | CASE_ID=${FUNCNAME[0]} | |
741 | echo "Test $CASE_ID: large files put/mv/get (multipart upload) with permission" | |
742 | ##################################################################### | |
743 | mkdir $CASE_ID | |
744 | large=$CASE_ID/large-source | |
745 | dd if=/dev/urandom of=$large bs=10M count=10 iflag=fullblock >> $CASE_ID.log 2>&1 | |
746 | chmod 444 $large | |
747 | ||
748 | MULTIPART_OPT="--multipart-split-size=5242880 --max-singlepart-upload-size=10485760 --max-singlepart-upload-size=10485760 --max-singlepart-download-size=10485760" | |
749 | ||
750 | $S4CMD put ${S4CMD_OPTS} ${MULTIPART_OPT} $large $REMOTEDIR/$CASE_ID/large >> $CASE_ID.log 2>&1 | |
751 | $S4CMD mv ${S4CMD_OPTS} ${MULTIPART_OPT} $REMOTEDIR/$CASE_ID/large $REMOTEDIR/$CASE_ID/large2 >> $CASE_ID.log 2>&1 | |
752 | $S4CMD get ${S4CMD_OPTS} ${MULTIPART_OPT} $REMOTEDIR/$CASE_ID/large2 $CASE_ID/large >> $CASE_ID.log 2>&1 | |
753 | ||
754 | stat -c %A $large >> $CASE_ID/large.privilege | |
755 | stat -c %A $CASE_ID/large >> $CASE_ID/large_dest.privilege | |
756 | ||
757 | md5sum $large | cut -f1 -d' ' >> $CASE_ID.md5 | |
758 | md5sum $CASE_ID/large | cut -f1 -d' ' >> $CASE_ID.chk | |
759 | ||
760 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
761 | result_permission=$(diff $CASE_ID/large.privilege $CASE_ID/large_dest.privilege) | |
762 | if [[ ( -z "$result" ) && ( -z "$result_permission" ) ]]; then | |
763 | echo " - OK" | |
764 | else | |
765 | echo " - Failed" | |
766 | TEST_FAILED=true | |
767 | fi | |
768 | } | |
769 | ||
770 | function case12 { | |
771 | ##################################################################### | |
772 | CASE_ID=${FUNCNAME[0]} | |
773 | echo "Test $CASE_ID: deletion" | |
774 | ##################################################################### | |
775 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
776 | $S4CMD del ${S4CMD_OPTS} -r $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
777 | $S4CMD ls ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ >> $CASE_ID.out 2> $CASE_ID.err | |
778 | ||
779 | result=$(cat $CASE_ID.out) | |
780 | if [[ -z "$result" ]]; then | |
781 | echo " - OK" | |
782 | else | |
783 | echo " - Failed" | |
784 | TEST_FAILED=true | |
785 | fi | |
786 | } | |
787 | ||
788 | function case13 { | |
789 | ##################################################################### | |
790 | CASE_ID=${FUNCNAME[0]} | |
791 | echo "Test $CASE_ID: Testing file permissions" | |
792 | ##################################################################### | |
793 | $S4CMD sync ${S4CMD_OPTS} source $REMOTEDIR/$CASE_ID/ >> $CASE_ID.log 2>&1 | |
794 | $S4CMD sync ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID/ $CASE_ID/ >> $CASE_ID.log 2>&1 | |
795 | stat -c %A source/* >> $CASE_ID/orig.privilege | |
796 | stat -c %A $CASE_ID/source/* >> $CASE_ID/dest.privilege | |
797 | ||
798 | result=$(diff $CASE_ID/orig.privilege $CASE_ID/dest.privilege) | |
799 | ||
800 | if [[ -z "$result_001" ]]; then | |
801 | echo " - OK" | |
802 | else | |
803 | echo " - Failed" | |
804 | TEST_FAILED=true | |
805 | fi | |
806 | } | |
807 | ||
808 | function case14 { | |
809 | ##################################################################### | |
810 | CASE_ID=${FUNCNAME[0]} | |
811 | echo "Test $CASE_ID: Testing double slashes" | |
812 | ##################################################################### | |
813 | mkdir $CASE_ID | |
814 | $S4CMD put ${S4CMD_OPTS} source/001 $REMOTEDIR/$CASE_ID//001 >> $CASE_ID.log 2>&1 | |
815 | $S4CMD get ${S4CMD_OPTS} $REMOTEDIR/$CASE_ID//001 $CASE_ID/001 >> $CASE_ID.log 2>&1 | |
816 | ||
817 | md5sum source/001 | cut -f1 -d' ' >> $CASE_ID.md5 | |
818 | md5sum $CASE_ID/001 | cut -f1 -d' ' >> $CASE_ID.chk | |
819 | result=$(diff $CASE_ID.md5 $CASE_ID.chk) | |
820 | if [[ -z "$result" ]]; then | |
821 | echo " - OK" | |
822 | else | |
823 | echo " - Failed" | |
824 | TEST_FAILED=true | |
825 | fi | |
826 | } | |
827 | ||
828 | TEST_CASES= | |
829 | if [ "$#" -ne 1 ]; then | |
830 | echo "Running all tests" | |
831 | # Search all test cases functions. | |
832 | TEST_CASES="$(grep -o -E "function\s+case[0-9-]+" $0 | cut -f2 -d' ' | xargs)" | |
833 | else | |
834 | TEST_CASES="$*" | |
835 | fi | |
836 | ||
837 | echo 'Initializing...' | |
838 | initialize > /dev/null 2>&1 | |
839 | echo "Executing test cases with $(python --version)" | |
840 | pushd $LOCALDIR > /dev/null | |
841 | for case in $TEST_CASES | |
842 | do | |
843 | $case | |
844 | done | |
845 | popd > /dev/null | |
846 | ||
847 | echo "Done testing" | |
848 | if [[ $TEST_FAILED == true ]]; then | |
849 | exit 111 | |
850 | fi |
0 | Metadata-Version: 2.1 | |
1 | Name: s4cmd | |
2 | Version: 2.1.0 | |
3 | Summary: Super S3 command line tool | |
4 | Home-page: https://github.com/bloomreach/s4cmd | |
5 | Author: Chou-han Yang | |
6 | License: http://www.apache.org/licenses/LICENSE-2.0 | |
7 | Description: # s4cmd | |
8 | ### Super S3 command line tool | |
9 | [![Build Status](https://travis-ci.com/bloomreach/s4cmd.svg?branch=master)](https://travis-ci.com/bloomreach/s4cmd) [![Join the chat at https://gitter.im/bloomreach/ s4cmd](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/bloomreach/s4cmd?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |
10 | ||
11 | ---- | |
12 | ||
13 | **Author**: Chou-han Yang ([@chouhanyang](https://github.com/chouhanyang)) | |
14 | ||
15 | **Current Maintainers**: Debodirno Chandra ([@debodirno](https://github.com/debodirno)) | Naveen Vardhi ([@rozuur](https://github.com/rozuur)) | Navin Pai ([@navinpai](https://github.com/navinpai)) | |
16 | ||
17 | ---- | |
18 | ||
19 | ## What's New in s4cmd 2.x | |
20 | ||
21 | - Fully migrated from old boto 2.x to new [boto3](http://boto3.readthedocs.io/en/latest/reference/services/s3.html) library, which provides more reliable and up-to-date S3 backend. | |
22 | - Support S3 `--API-ServerSideEncryption` along with **36 new API pass-through options**. See API pass-through options section for complete list. | |
23 | - Support batch delete (with delete_objects API) to delete up to 1000 files with single call. **100+ times faster** than sequential deletion. | |
24 | - Support `S4CMD_OPTS` environment variable for commonly used options such as `--API-ServerSideEncryption` across all your s4cmd operations. | |
25 | - Support moving files **larger than 5GB** with multipart upload. **20+ times faster** then sequential move operation when moving large files. | |
26 | - Support timestamp filtering with `--last-modified-before` and `--last-modified-after` options for all operations. Human friendly timestamps are supported, e.g. `--last-modified-before='2 months ago'` | |
27 | - Faster upload with lazy evaluation of md5 hash. | |
28 | - Listing large number of files with S3 pagination, with memory is the limit. | |
29 | - New directory to directory `dsync` command is better and standalone implementation to replace old `sync` command, which is implemented based on top of get/put/mv commands. `--delete-removed` work for all cases including local to s3, s3 to local, and s3 to s3. `sync` command preserves the old behavior in this version for compatibility. | |
30 | - [Support for S3 compatible storage services](https://github.com/bloomreach/s4cmd/issues/52) such as DreamHost and Cloudian using `--endpoint-url` (Community Supported Beta Feature). | |
31 | - Tested on both Python 2.7, 3.6, 3.7, 3.8, 3.9 and nightly. | |
32 | - Special thanks to [onera.com](http://www.onera.com) for supporting s4cmd. | |
33 | ||
34 | ||
35 | ## Motivation | |
36 | ||
37 | S4cmd is a command-line utility for accessing | |
38 | [Amazon S3](http://en.wikipedia.org/wiki/Amazon_S3), inspired by | |
39 | [s3cmd](http://s3tools.org/s3cmd). | |
40 | ||
41 | We have used s3cmd heavily for a number of scripted, data-intensive | |
42 | applications. However as the need for a variety of small improvements arose, we | |
43 | created our own implementation, s4cmd. It is intended as an alternative to | |
44 | s3cmd for enhanced performance and for large files, and with a number of | |
45 | additional features and fixes that we have found useful. | |
46 | ||
47 | It strives to be compatible with the most common usage scenarios for s3cmd. It | |
48 | does not offer exact drop-in compatibility, due to a number of corner cases where | |
49 | different behavior seems preferable, or for bugfixes. | |
50 | ||
51 | ||
52 | ## Features | |
53 | ||
54 | S4cmd supports the regular commands you might expect for fetching and storing | |
55 | files in S3: `ls`, `put`, `get`, `cp`, `mv`, `sync`, `del`, `du`. | |
56 | ||
57 | The main features that distinguish s4cmd are: | |
58 | ||
59 | - Simple (less than 1500 lines of code) and implemented in pure Python, based | |
60 | on the widely used [Boto3](https://github.com/boto/boto3) library. | |
61 | - Multi-threaded/multi-connection implementation for enhanced performance on all | |
62 | commands. As with many network-intensive applications (like web browsers), | |
63 | accessing S3 in a single-threaded way is often significantly less efficient than | |
64 | having multiple connections actively transferring data at once. In general, we | |
65 | get a 2X boost to upload/download speeds from this. | |
66 | - Path handling: S3 is not a traditional filesystem with built-in support for | |
67 | directory structure: internally, there are only objects, not directories or | |
68 | folders. However, most people use S3 in a hierarchical structure, with paths | |
69 | separated by slashes, to emulate traditional filesystems. S4cmd follows | |
70 | conventions to more closely replicate the behavior of traditional filesystems | |
71 | in certain corner cases. For example, "ls" and "cp" work much like in Unix | |
72 | shells, to avoid odd surprises. (For examples see compatibility notes below.) | |
73 | - Wildcard support: Wildcards, including multiple levels of wildcards, like in | |
74 | Unix shells, are handled. For example: | |
75 | s3://my-bucket/my-folder/20120512/*/*chunk00?1? | |
76 | - Automatic retry: Failure tasks will be executed again after a delay. | |
77 | - Multi-part upload support for files larger than 5GB. | |
78 | - Handling of MD5s properly with respect to multi-part uploads (for the sordid | |
79 | details of this, see below). | |
80 | - Miscellaneous enhancements and bugfixes: | |
81 | - Partial file creation: Avoid creating empty target files if source does not | |
82 | exist. Avoid creating partial output files when commands are interrupted. | |
83 | - General thread safety: Tool can be interrupted or killed at any time without | |
84 | being blocked by child threads or leaving incomplete or corrupt files in | |
85 | place. | |
86 | - Ensure exit code is nonzero on all failure scenarios (a very important | |
87 | feature in scripts). | |
88 | - Expected handling of symlinks (they are followed). | |
89 | - Support both `s3://` and `s3n://` prefixes (the latter is common with | |
90 | Amazon Elastic Mapreduce). | |
91 | ||
92 | Limitations: | |
93 | ||
94 | - No CloudFront or other feature support. | |
95 | - Currently, we simulate `sync` with `get` and `put` with `--recursive --force --sync-check`. | |
96 | ||
97 | ||
98 | ## Installation and Setup | |
99 | You can install `s4cmd` [PyPI](https://pypi.python.org/pypi/s4cmd). | |
100 | ||
101 | ``` | |
102 | pip install s4cmd | |
103 | ``` | |
104 | ||
105 | - Copy or create a symbolic link so you can run `s4cmd.py` as `s4cmd`. (It is just | |
106 | a single file!) | |
107 | - If you already have a `~/.s3cfg` file from configuring `s3cmd`, credentials | |
108 | from this file will be used. Otherwise, set the `S3_ACCESS_KEY` and | |
109 | `S3_SECRET_KEY` environment variables to contain your S3 credentials. | |
110 | - If no keys are provided, but an IAM role is associated with the EC2 instance, it will | |
111 | be used transparently. | |
112 | ||
113 | ||
114 | ## s4cmd Commands | |
115 | ||
116 | #### `s4cmd ls [path]` | |
117 | ||
118 | List all contents of a directory. | |
119 | ||
120 | * -r/--recursive: recursively display all contents including subdirectories under the given path. | |
121 | * -d/--show-directory: show the directory entry instead of its content. | |
122 | ||
123 | ||
124 | #### `s4cmd put [source] [target]` | |
125 | ||
126 | Upload local files up to S3. | |
127 | ||
128 | * -r/--recursive: also upload directories recursively. | |
129 | * -s/--sync-check: check md5 hash to avoid uploading the same content. | |
130 | * -f/--force: override existing file instead of showing error message. | |
131 | * -n/--dry-run: emulate the operation without real upload. | |
132 | ||
133 | #### `s4cmd get [source] [target]` | |
134 | ||
135 | Download files from S3 to local filesystem. | |
136 | ||
137 | * -r/--recursive: also download directories recursively. | |
138 | * -s/--sync-check: check md5 hash to avoid downloading the same content. | |
139 | * -f/--force: override existing file instead of showing error message. | |
140 | * -n/--dry-run: emulate the operation without real download. | |
141 | ||
142 | ||
143 | #### `s4cmd dsync [source dir] [target dir]` | |
144 | ||
145 | Synchronize the contents of two directories. The directory can either be local or remote, but currently, it doesn't support two local directories. | |
146 | ||
147 | * -r/--recursive: also sync directories recursively. | |
148 | * -s/--sync-check: check md5 hash to avoid syncing the same content. | |
149 | * -f/--force: override existing file instead of showing error message. | |
150 | * -n/--dry-run: emulate the operation without real sync. | |
151 | * --delete-removed: delete files not in source directory. | |
152 | ||
153 | #### `s4cmd sync [source] [target]` | |
154 | ||
155 | (Obsolete, use `dsync` instead) Synchronize the contents of two directories. The directory can either be local or remote, but currently, it doesn't support two local directories. This command simply invoke get/put/mv commands. | |
156 | ||
157 | * -r/--recursive: also sync directories recursively. | |
158 | * -s/--sync-check: check md5 hash to avoid syncing the same content. | |
159 | * -f/--force: override existing file instead of showing error message. | |
160 | * -n/--dry-run: emulate the operation without real sync. | |
161 | * --delete-removed: delete files not in source directory. Only works when syncing local directory to s3 directory. | |
162 | ||
163 | #### `s4cmd cp [source] [target]` | |
164 | ||
165 | Copy a file or a directory from a S3 location to another. | |
166 | ||
167 | * -r/--recursive: also copy directories recursively. | |
168 | * -s/--sync-check: check md5 hash to avoid copying the same content. | |
169 | * -f/--force: override existing file instead of showing error message. | |
170 | * -n/--dry-run: emulate the operation without real copy. | |
171 | ||
172 | #### `s4cmd mv [source] [target]` | |
173 | ||
174 | Move a file or a directory from a S3 location to another. | |
175 | ||
176 | * -r/--recursive: also move directories recursively. | |
177 | * -s/--sync-check: check md5 hash to avoid moving the same content. | |
178 | * -f/--force: override existing file instead of showing error message. | |
179 | * -n/--dry-run: emulate the operation without real move. | |
180 | ||
181 | #### `s4cmd del [path]` | |
182 | ||
183 | Delete files or directories on S3. | |
184 | ||
185 | * -r/--recursive: also delete directories recursively. | |
186 | * -n/--dry-run: emulate the operation without real delete. | |
187 | ||
188 | #### `s4cmd du [path]` | |
189 | ||
190 | Get the size of the given directory. | |
191 | ||
192 | Available parameters: | |
193 | ||
194 | * -r/--recursive: also add sizes of sub-directories recursively. | |
195 | ||
196 | ## s4cmd Control Options | |
197 | ||
198 | ##### `-p S3CFG, --config=[filename]` | |
199 | path to s3cfg config file | |
200 | ||
201 | ##### `-f, --force` | |
202 | force overwrite files when download or upload | |
203 | ||
204 | ##### `-r, --recursive` | |
205 | recursively checking subdirectories | |
206 | ||
207 | ##### `-s, --sync-check` | |
208 | check file md5 before download or upload | |
209 | ||
210 | ##### `-n, --dry-run` | |
211 | trial run without actual download or upload | |
212 | ||
213 | ##### `-t RETRY, --retry=[integer]` | |
214 | number of retries before giving up | |
215 | ||
216 | ##### `--retry-delay=[integer]` | |
217 | seconds to sleep between retries | |
218 | ||
219 | ##### `-c NUM_THREADS, --num-threads=NUM_THREADS` | |
220 | number of concurrent threads | |
221 | ||
222 | ##### `--endpoint-url` | |
223 | endpoint url used in boto3 client | |
224 | ||
225 | ##### `-d, --show-directory` | |
226 | show directory instead of its content | |
227 | ||
228 | ##### `--ignore-empty-source` | |
229 | ignore empty source from s3 | |
230 | ||
231 | ##### `--use-ssl` | |
232 | (obsolete) use SSL connection to S3 | |
233 | ||
234 | ##### `--verbose` | |
235 | verbose output | |
236 | ||
237 | ##### `--debug` | |
238 | debug output | |
239 | ||
240 | ##### `--validate` | |
241 | (obsolete) validate lookup operation | |
242 | ||
243 | ##### `-D, --delete-removed` | |
244 | delete remote files that do not exist in source after sync | |
245 | ||
246 | ##### `--multipart-split-size=[integer]` | |
247 | size in bytes to split multipart transfers | |
248 | ||
249 | ##### `--max-singlepart-download-size=[integer]` | |
250 | files with size (in bytes) greater than this will be | |
251 | downloaded in multipart transfers | |
252 | ||
253 | ##### `--max-singlepart-upload-size=[integer]` | |
254 | files with size (in bytes) greater than this will be | |
255 | uploaded in multipart transfers | |
256 | ||
257 | ##### `--max-singlepart-copy-size=[integer]` | |
258 | files with size (in bytes) greater than this will be | |
259 | copied in multipart transfers | |
260 | ||
261 | ##### `--batch-delete-size=[integer]` | |
262 | Number of files (<1000) to be combined in batch delete. | |
263 | ||
264 | ##### `--last-modified-before=[datetime]` | |
265 | Condition on files where their last modified dates are | |
266 | before given parameter. | |
267 | ||
268 | ##### `--last-modified-after=[datetime]` | |
269 | Condition on files where their last modified dates are | |
270 | after given parameter. | |
271 | ||
272 | ||
273 | ## S3 API Pass-through Options | |
274 | ||
275 | Those options are directly translated to boto3 API commands. The options provided will be filtered by the APIs that are taking parameters. For example, `--API-ServerSideEncryption` is only needed for `put_object`, `create_multipart_upload` but not for `list_buckets` and `get_objects` for example. Therefore, providing `--API-ServerSideEncryption` for `s4cmd ls` has no effect. | |
276 | ||
277 | For more information, please see boto3 s3 documentations http://boto3.readthedocs.io/en/latest/reference/services/s3.html | |
278 | ||
279 | ##### `--API-ACL=[string]` | |
280 | The canned ACL to apply to the object. | |
281 | ||
282 | ##### `--API-CacheControl=[string]` | |
283 | Specifies caching behavior along the request/reply chain. | |
284 | ||
285 | ##### `--API-ContentDisposition=[string]` | |
286 | Specifies presentational information for the object. | |
287 | ||
288 | ##### `--API-ContentEncoding=[string]` | |
289 | Specifies what content encodings have been applied to the object and thus what decoding mechanisms must be applied to obtain the media-type referenced by the Content-Type header field. | |
290 | ||
291 | ##### `--API-ContentLanguage=[string]` | |
292 | The language the content is in. | |
293 | ||
294 | ##### `--API-ContentMD5=[string]` | |
295 | The base64-encoded 128-bit MD5 digest of the part data. | |
296 | ||
297 | ##### `--API-ContentType=[string]` | |
298 | A standard MIME type describing the format of the object data. | |
299 | ||
300 | ##### `--API-CopySourceIfMatch=[string]` | |
301 | Copies the object if its entity tag (ETag) matches the specified tag. | |
302 | ||
303 | ##### `--API-CopySourceIfModifiedSince=[datetime]` | |
304 | Copies the object if it has been modified since the specified time. | |
305 | ||
306 | ##### `--API-CopySourceIfNoneMatch=[string]` | |
307 | Copies the object if its entity tag (ETag) is different than the specified ETag. | |
308 | ||
309 | ##### `--API-CopySourceIfUnmodifiedSince=[datetime]` | |
310 | Copies the object if it hasn't been modified since the specified time. | |
311 | ||
312 | ##### `--API-CopySourceRange=[string]` | |
313 | The range of bytes to copy from the source object. The range value must use the form bytes=first-last, where the first and last are the zero-based byte offsets to copy. For example, bytes=0-9 indicates that you want to copy the first ten bytes of the source. You can copy a range only if the source object is greater than 5 GB. | |
314 | ||
315 | ##### `--API-CopySourceSSECustomerAlgorithm=[string]` | |
316 | Specifies the algorithm to use when decrypting the source object (e.g., AES256). | |
317 | ||
318 | ##### `--API-CopySourceSSECustomerKeyMD5=[string]` | |
319 | Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321. Amazon S3 uses this header for a message integrity check to ensure the encryption key was transmitted without error. Please note that this parameter is automatically populated if it is not provided. Including this parameter is not required | |
320 | ||
321 | ##### `--API-CopySourceSSECustomerKey=[string]` | |
322 | Specifies the customer-provided encryption key for Amazon S3 to use to decrypt the source object. The encryption key provided in this header must be one that was used when the source object was created. | |
323 | ||
324 | ##### `--API-ETag=[string]` | |
325 | Entity tag returned when the part was uploaded. | |
326 | ||
327 | ##### `--API-Expires=[datetime]` | |
328 | The date and time at which the object is no longer cacheable. | |
329 | ||
330 | ##### `--API-GrantFullControl=[string]` | |
331 | Gives the grantee READ, READ_ACP, and WRITE_ACP permissions on the object. | |
332 | ||
333 | ##### `--API-GrantReadACP=[string]` | |
334 | Allows grantee to read the object ACL. | |
335 | ||
336 | ##### `--API-GrantRead=[string]` | |
337 | Allows grantee to read the object data and its metadata. | |
338 | ||
339 | ##### `--API-GrantWriteACP=[string]` | |
340 | Allows grantee to write the ACL for the applicable object. | |
341 | ||
342 | ##### `--API-IfMatch=[string]` | |
343 | Return the object only if its entity tag (ETag) is the same as the one specified, otherwise return a 412 (precondition failed). | |
344 | ||
345 | ##### `--API-IfModifiedSince=[datetime]` | |
346 | Return the object only if it has been modified since the specified time, otherwise return a 304 (not modified). | |
347 | ||
348 | ##### `--API-IfNoneMatch=[string]` | |
349 | Return the object only if its entity tag (ETag) is different from the one specified, otherwise return a 304 (not modified). | |
350 | ||
351 | ##### `--API-IfUnmodifiedSince=[datetime]` | |
352 | Return the object only if it has not been modified since the specified time, otherwise return a 412 (precondition failed). | |
353 | ||
354 | ##### `--API-Metadata=[dict]` | |
355 | A map (in json string) of metadata to store with the object in S3 | |
356 | ||
357 | ##### `--API-MetadataDirective=[string]` | |
358 | Specifies whether the metadata is copied from the source object or replaced with metadata provided in the request. | |
359 | ||
360 | ##### `--API-MFA=[string]` | |
361 | The concatenation of the authentication device's serial number, a space, and the value that is displayed on your authentication device. | |
362 | ||
363 | ##### `--API-RequestPayer=[string]` | |
364 | Confirms that the requester knows that she or he will be charged for the request. Bucket owners need not specify this parameter in their requests. Documentation on downloading objects from requester pays buckets can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html | |
365 | ||
366 | ##### `--API-ServerSideEncryption=[string]` | |
367 | The Server-side encryption algorithm used when storing this object in S3 (e.g., AES256, aws:kms). | |
368 | ||
369 | ##### `--API-SSECustomerAlgorithm=[string]` | |
370 | Specifies the algorithm to use to when encrypting the object (e.g., AES256). | |
371 | ||
372 | ##### `--API-SSECustomerKeyMD5=[string]` | |
373 | Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321. Amazon S3 uses this header for a message integrity check to ensure the encryption key was transmitted without error. Please note that this parameter is automatically populated if it is not provided. Including this parameter is not required | |
374 | ||
375 | ##### `--API-SSECustomerKey=[string]` | |
376 | Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data. This value is used to store the object and then it is discarded; Amazon does not store the encryption key. The key must be appropriate for use with the algorithm specified in the x-amz-server-side-encryption-customer-algorithm header. | |
377 | ||
378 | ##### `--API-SSEKMSKeyId=[string]` | |
379 | Specifies the AWS KMS key ID to use for object encryption. All GET and PUT requests for an object protected by AWS KMS will fail if not made via SSL or using SigV4. Documentation on configuring any of the officially supported AWS SDKs and CLI can be found at http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingAWSSDK.html#specify-signature-version | |
380 | ||
381 | ##### `--API-StorageClass=[string]` | |
382 | The type of storage to use for the object. Defaults to 'STANDARD'. | |
383 | ||
384 | ##### `--API-VersionId=[string]` | |
385 | VersionId used to reference a specific version of the object. | |
386 | ||
387 | ##### `--API-WebsiteRedirectLocation=[string]` | |
388 | If the bucket is configured as a website, redirects requests for this object to another object in the same bucket or to an external URL. Amazon S3 stores the value of this header in the object metadata. | |
389 | ||
390 | ||
391 | ## Debugging Tips | |
392 | ||
393 | Simply enable `--debug` option to see the full log of s4cmd. If you even need to check what APIs are invoked from s4cmd to boto3, you can run: | |
394 | ||
395 | ``` | |
396 | s4cmd --debug [op] .... 2>&1 >/dev/null | grep S3APICALL | |
397 | ``` | |
398 | ||
399 | To see all the parameters sending to S3 API. | |
400 | ||
401 | ||
402 | ## Compatibility between s3cmd and s4cmd | |
403 | ||
404 | Prefix matching: In s3cmd, unlike traditional filesystems, prefix names match listings: | |
405 | ||
406 | ``` | |
407 | >> s3cmd ls s3://my-bucket/ch | |
408 | s3://my-bucket/charlie/ | |
409 | s3://my-bucket/chyang/ | |
410 | ``` | |
411 | ||
412 | In s4cmd, behavior is the same as with a Unix shell: | |
413 | ||
414 | ``` | |
415 | >>s4cmd ls s3://my-bucket/ch | |
416 | >(empty) | |
417 | ``` | |
418 | ||
419 | To get prefix behavior, use explicit wildcards instead: s4cmd ls s3://my-bucket/ch* | |
420 | ||
421 | Similarly, sync and cp commands emulate the Unix cp command, so directory to | |
422 | directory sync use different syntax: | |
423 | ||
424 | ``` | |
425 | >> s3cmd sync s3://bucket/path/dirA s3://bucket/path/dirB/ | |
426 | ``` | |
427 | will copy contents in dirA to dirB. | |
428 | ``` | |
429 | >> s4cmd sync s3://bucket/path/dirA s3://bucket/path/dirB/ | |
430 | ``` | |
431 | will copy dirA *into* dirB. | |
432 | ||
433 | To achieve the s3cmd behavior, use wildcards: | |
434 | ``` | |
435 | s4cmd sync s3://bucket/path/dirA/* s3://bucket/path/dirB/ | |
436 | ``` | |
437 | ||
438 | Note s4cmd doesn't support dirA without trailing slash indicating dirA/* as | |
439 | what rsync supported. | |
440 | ||
441 | No automatic override for put command: | |
442 | s3cmd put fileA s3://bucket/path/fileB will return error if fileB exists. | |
443 | Use -f as well as get command. | |
444 | ||
445 | Bugfixes for handling of non-existent paths: Often s3cmd creates empty files when specified paths do not exist: | |
446 | s3cmd get s3://my-bucket/no_such_file downloads an empty file. | |
447 | s4cmd get s3://my-bucket/no_such_file returns an error. | |
448 | s3cmd put no_such_file s3://my-bucket/ uploads an empty file. | |
449 | s4cmd put no_such_file s3://my-bucket/ returns an error. | |
450 | ||
451 | ||
452 | ## Additional technical notes | |
453 | ||
454 | Etags, MD5s and multi-part uploads: Traditionally, the etag of an object in S3 | |
455 | has been its MD5. However, this changed with the introduction of S3 multi-part | |
456 | uploads; in this case the etag is still a unique ID, but it is not the MD5 of | |
457 | the file. Amazon has not revealed the definition of the etag in this case, so | |
458 | there is no way we can calculate and compare MD5s based on the etag header in | |
459 | general. The workaround we use is to upload the MD5 as a supplemental content | |
460 | header (called "md5", instead of "etag"). This enables s4cmd to check the MD5 | |
461 | hash before upload or download. The only limitation is that this only works for | |
462 | files uploaded via s4cmd. Programs that do not understand this header will | |
463 | still have to download and verify the MD5 directly. | |
464 | ||
465 | ||
466 | ## Unimplemented features | |
467 | ||
468 | - CloudFront or other feature support beyond basic S3 access. | |
469 | ||
470 | ## Credits | |
471 | ||
472 | * Bloomreach http://www.bloomreach.com | |
473 | * Onera http://www.onera.com | |
474 | ||
475 | Platform: UNKNOWN | |
476 | Description-Content-Type: text/markdown |
0 | README.md | |
1 | s4cmd.py | |
2 | setup.py | |
3 | s4cmd.egg-info/PKG-INFO | |
4 | s4cmd.egg-info/SOURCES.txt | |
5 | s4cmd.egg-info/dependency_links.txt | |
6 | s4cmd.egg-info/entry_points.txt | |
7 | s4cmd.egg-info/requires.txt | |
8 | s4cmd.egg-info/top_level.txt⏎ |
0 | s4cmd |
59 | 59 | TEMP_FILES = set() |
60 | 60 | |
61 | 61 | # Environment variable names for S3 credentials. |
62 | S3_ACCESS_KEY_NAME = "S3_ACCESS_KEY" | |
63 | S3_SECRET_KEY_NAME = "S3_SECRET_KEY" | |
62 | S3_ACCESS_KEY_NAME = "AWS_ACCESS_KEY_ID" | |
63 | S3_SECRET_KEY_NAME = "AWS_SECRET_ACCESS_KEY" | |
64 | 64 | S4CMD_ENV_KEY = "S4CMD_OPTS" |
65 | 65 | |
66 | 66 | |
654 | 654 | config = ConfigParser.ConfigParser() |
655 | 655 | config.read(s3cfg_path) |
656 | 656 | keys = config.get("default", "access_key"), config.get("default", "secret_key") |
657 | debug("read S3 keys from $HOME/.s3cfg file") | |
657 | debug("read S3 keys from %s file", s3cfg_path) | |
658 | 658 | return keys |
659 | 659 | except Exception as e: |
660 | 660 | info("could not read S3 keys from %s file; skipping (%s)", s3cfg_path, e) |
1157 | 1157 | fi = filter_path.split(PATH_SEP) |
1158 | 1158 | |
1159 | 1159 | # Here, if we are in recursive mode, we allow the pi to be longer than fi. |
1160 | # Otherwise, length of pi should be equal or less than the lenght of fi. | |
1160 | # Otherwise, length of pi should be equal or less than the length of fi. | |
1161 | 1161 | min_len = min(len(pi), len(fi)) |
1162 | 1162 | matched = fnmatch.fnmatch(PATH_SEP.join(pi[0:min_len]), PATH_SEP.join(fi[0:min_len])) |
1163 | 1163 | return matched and (self.opt.recursive or len(pi) <= len(fi)) |
0 | # Thanks baboon project for the code. | |
1 | import subprocess | |
2 | ||
3 | ||
4 | def cmp_to_key(mycmp): | |
5 | """ Converts a cmp= function into a key= function. | |
6 | """ | |
7 | ||
8 | class K(object): | |
9 | def __init__(self, obj, *args): | |
10 | self.obj = obj | |
11 | ||
12 | def __lt__(self, other): | |
13 | return mycmp(self.obj, other.obj) < 0 | |
14 | ||
15 | def __gt__(self, other): | |
16 | return mycmp(self.obj, other.obj) > 0 | |
17 | ||
18 | def __eq__(self, other): | |
19 | return mycmp(self.obj, other.obj) == 0 | |
20 | ||
21 | def __le__(self, other): | |
22 | return mycmp(self.obj, other.obj) <= 0 | |
23 | ||
24 | def __ge__(self, other): | |
25 | return mycmp(self.obj, other.obj) >= 0 | |
26 | ||
27 | def __ne__(self, other): | |
28 | return mycmp(self.obj, other.obj) != 0 | |
29 | ||
30 | def __hash__(self): | |
31 | raise TypeError('hash not implemented') | |
32 | ||
33 | return K |