coreutils: revert the new dep to twox-hash until it is accepted
Sylvestre Ledru
3 years ago
0 | Index: coreutils/src/uu/sort/Cargo.toml | |
1 | =================================================================== | |
2 | --- coreutils.orig/src/uu/sort/Cargo.toml | |
3 | +++ coreutils/src/uu/sort/Cargo.toml | |
4 | @@ -15,9 +15,7 @@ edition = "2018" | |
5 | path = "src/sort.rs" | |
6 | ||
7 | [dependencies] | |
8 | -rand = "0.7" | |
9 | clap = "2.33" | |
10 | -twox-hash = "1.6.0" | |
11 | itertools = "0.9" | |
12 | semver = "0.9.0" | |
13 | uucore = { version=">=0.0.8", package="uucore", path="../../uucore", features=["fs"] } | |
14 | Index: coreutils/src/uu/sort/src/sort.rs | |
15 | =================================================================== | |
16 | --- coreutils.orig/src/uu/sort/src/sort.rs | |
17 | +++ coreutils/src/uu/sort/src/sort.rs | |
18 | @@ -1,7 +1,6 @@ | |
19 | // * This file is part of the uutils coreutils package. | |
20 | // * | |
21 | // * (c) Michael Yin <mikeyin@mikeyin.org> | |
22 | -// * (c) Robert Swinford <robert.swinford..AT..gmail.com> | |
23 | // * | |
24 | // * For the full copyright and license information, please view the LICENSE | |
25 | // * file that was distributed with this source code. | |
26 | @@ -13,17 +12,13 @@ extern crate uucore; | |
27 | ||
28 | use clap::{App, Arg}; | |
29 | use itertools::Itertools; | |
30 | -use rand::distributions::Alphanumeric; | |
31 | -use rand::{thread_rng, Rng}; | |
32 | use semver::Version; | |
33 | use std::cmp::Ordering; | |
34 | use std::collections::BinaryHeap; | |
35 | use std::fs::File; | |
36 | -use std::hash::{Hash, Hasher}; | |
37 | use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Lines, Read, Write}; | |
38 | use std::mem::replace; | |
39 | use std::path::Path; | |
40 | -use twox_hash::XxHash64; | |
41 | use uucore::fs::is_stdin_interactive; // for Iterator::dedup() | |
42 | ||
43 | static NAME: &str = "sort"; | |
44 | @@ -39,18 +34,16 @@ static OPT_DICTIONARY_ORDER: &str = "dic | |
45 | static OPT_MERGE: &str = "merge"; | |
46 | static OPT_CHECK: &str = "check"; | |
47 | static OPT_IGNORE_CASE: &str = "ignore-case"; | |
48 | -static OPT_IGNORE_BLANKS: &str = "ignore-blanks"; | |
49 | static OPT_OUTPUT: &str = "output"; | |
50 | static OPT_REVERSE: &str = "reverse"; | |
51 | static OPT_STABLE: &str = "stable"; | |
52 | static OPT_UNIQUE: &str = "unique"; | |
53 | -static OPT_RANDOM: &str = "random-sort"; | |
54 | ||
55 | static ARG_FILES: &str = "files"; | |
56 | ||
57 | static DECIMAL_PT: char = '.'; | |
58 | static THOUSANDS_SEP: char = ','; | |
59 | -#[derive(Eq, Ord, PartialEq, PartialOrd)] | |
60 | + | |
61 | enum SortMode { | |
62 | Numeric, | |
63 | HumanNumeric, | |
64 | @@ -67,10 +60,8 @@ struct Settings { | |
65 | stable: bool, | |
66 | unique: bool, | |
67 | check: bool, | |
68 | - random: bool, | |
69 | compare_fns: Vec<fn(&str, &str) -> Ordering>, | |
70 | transform_fns: Vec<fn(&str) -> String>, | |
71 | - salt: String, | |
72 | } | |
73 | ||
74 | impl Default for Settings { | |
75 | @@ -83,10 +74,8 @@ impl Default for Settings { | |
76 | stable: false, | |
77 | unique: false, | |
78 | check: false, | |
79 | - random: false, | |
80 | compare_fns: Vec::new(), | |
81 | transform_fns: Vec::new(), | |
82 | - salt: String::new(), | |
83 | } | |
84 | } | |
85 | } | |
86 | @@ -166,14 +155,17 @@ impl<'a> Iterator for FileMerger<'a> { | |
87 | } | |
88 | } | |
89 | } | |
90 | - | |
91 | fn get_usage() -> String { | |
92 | format!( | |
93 | "{0} {1} | |
94 | + | |
95 | Usage: | |
96 | {0} [OPTION]... [FILE]... | |
97 | + | |
98 | Write the sorted concatenation of all FILE(s) to standard output. | |
99 | + | |
100 | Mandatory arguments for long options are mandatory for short options too. | |
101 | + | |
102 | With no FILE, or when FILE is -, read standard input.", | |
103 | NAME, VERSION | |
104 | ) | |
105 | @@ -237,12 +229,6 @@ pub fn uumain(args: impl uucore::Args) - | |
106 | .help("fold lower case to upper case characters"), | |
107 | ) | |
108 | .arg( | |
109 | - Arg::with_name(OPT_IGNORE_BLANKS) | |
110 | - .short("b") | |
111 | - .long(OPT_IGNORE_BLANKS) | |
112 | - .help("ignore leading blanks when finding sort keys in each line"), | |
113 | - ) | |
114 | - .arg( | |
115 | Arg::with_name(OPT_OUTPUT) | |
116 | .short("o") | |
117 | .long(OPT_OUTPUT) | |
118 | @@ -251,12 +237,6 @@ pub fn uumain(args: impl uucore::Args) - | |
119 | .value_name("FILENAME"), | |
120 | ) | |
121 | .arg( | |
122 | - Arg::with_name(OPT_RANDOM) | |
123 | - .short("R") | |
124 | - .long(OPT_RANDOM) | |
125 | - .help("shuffle in random order"), | |
126 | - ) | |
127 | - .arg( | |
128 | Arg::with_name(OPT_REVERSE) | |
129 | .short("r") | |
130 | .long(OPT_REVERSE) | |
131 | @@ -305,20 +285,11 @@ pub fn uumain(args: impl uucore::Args) - | |
132 | settings.transform_fns.push(|s| s.to_uppercase()); | |
133 | } | |
134 | ||
135 | - if matches.is_present(OPT_IGNORE_BLANKS) { | |
136 | - settings.transform_fns.push(|s| s.trim_start().to_string()); | |
137 | - } | |
138 | - | |
139 | settings.outfile = matches.value_of(OPT_OUTPUT).map(String::from); | |
140 | settings.reverse = matches.is_present(OPT_REVERSE); | |
141 | settings.stable = matches.is_present(OPT_STABLE); | |
142 | settings.unique = matches.is_present(OPT_UNIQUE); | |
143 | ||
144 | - if matches.is_present(OPT_RANDOM) { | |
145 | - settings.random = matches.is_present(OPT_RANDOM); | |
146 | - settings.salt = get_rand_string(); | |
147 | - } | |
148 | - | |
149 | //let mut files = matches.free; | |
150 | if files.is_empty() { | |
151 | /* if no file, default to stdin */ | |
152 | @@ -342,10 +313,10 @@ pub fn uumain(args: impl uucore::Args) - | |
153 | } | |
154 | } | |
155 | ||
156 | - exec(files, &mut settings) | |
157 | + exec(files, &settings) | |
158 | } | |
159 | ||
160 | -fn exec(files: Vec<String>, settings: &mut Settings) -> i32 { | |
161 | +fn exec(files: Vec<String>, settings: &Settings) -> i32 { | |
162 | let mut lines = Vec::new(); | |
163 | let mut file_merger = FileMerger::new(&settings); | |
164 | ||
165 | @@ -380,13 +351,6 @@ fn exec(files: Vec<String>, settings: &m | |
166 | } else { | |
167 | print_sorted(file_merger, &settings.outfile) | |
168 | } | |
169 | - } else if settings.unique && settings.mode == SortMode::Numeric { | |
170 | - print_sorted( | |
171 | - lines | |
172 | - .iter() | |
173 | - .dedup_by(|a, b| num_sort_dedup(a) == num_sort_dedup(b)), | |
174 | - &settings.outfile, | |
175 | - ) | |
176 | } else if settings.unique { | |
177 | print_sorted(lines.iter().dedup(), &settings.outfile) | |
178 | } else { | |
179 | @@ -455,11 +419,7 @@ fn compare_by(a: &str, b: &str, settings | |
180 | }; | |
181 | ||
182 | for compare_fn in &settings.compare_fns { | |
183 | - let cmp: Ordering = if settings.random { | |
184 | - random_shuffle(a, b, settings.salt.clone()) | |
185 | - } else { | |
186 | - compare_fn(a, b) | |
187 | - }; | |
188 | + let cmp = compare_fn(a, b); | |
189 | if cmp != Ordering::Equal { | |
190 | if settings.reverse { | |
191 | return cmp.reverse(); | |
192 | @@ -471,60 +431,36 @@ fn compare_by(a: &str, b: &str, settings | |
193 | Ordering::Equal | |
194 | } | |
195 | ||
196 | -fn default_compare(a: &str, b: &str) -> Ordering { | |
197 | - a.cmp(b) | |
198 | -} | |
199 | - | |
200 | -fn get_leading_number(a: &str) -> &str { | |
201 | - let mut s = ""; | |
202 | - for c in a.chars() { | |
203 | - if !c.is_numeric() && !c.eq(&'-') && !c.eq(&' ') && !c.eq(&'.') && !c.eq(&',') { | |
204 | - s = a.trim().split(c).next().unwrap(); | |
205 | - break; | |
206 | - } | |
207 | - s = a.trim(); | |
208 | - } | |
209 | - return s; | |
210 | -} | |
211 | - | |
212 | -// Matches GNU behavior, see: | |
213 | -// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html | |
214 | -// Specifically *not* the same as sort -n | uniq | |
215 | -fn num_sort_dedup(a: &str) -> &str { | |
216 | - // Empty lines are dumped | |
217 | - if a.is_empty() { | |
218 | - return "0"; | |
219 | - // And lines that don't begin numerically are dumped | |
220 | - } else if !a.trim().chars().nth(0).unwrap_or('\0').is_numeric() { | |
221 | - return "0"; | |
222 | - } else { | |
223 | - // Prepare lines for comparison of only the numerical leading numbers | |
224 | - return get_leading_number(a); | |
225 | - }; | |
226 | -} | |
227 | - | |
228 | /// Parse the beginning string into an f64, returning -inf instead of NaN on errors. | |
229 | fn permissive_f64_parse(a: &str) -> f64 { | |
230 | + // Maybe should be split on non-digit, but then 10e100 won't parse properly. | |
231 | + // On the flip side, this will give NEG_INFINITY for "1,234", which might be OK | |
232 | + // because there's no way to handle both CSV and thousands separators without a new flag. | |
233 | + // GNU sort treats "1,234" as "1" in numeric, so maybe it's fine. | |
234 | // GNU sort treats "NaN" as non-number in numeric, so it needs special care. | |
235 | - match a.parse::<f64>() { | |
236 | - Ok(a) if a.is_nan() => std::f64::NEG_INFINITY, | |
237 | - Ok(a) => a, | |
238 | - Err(_) => std::f64::NEG_INFINITY, | |
239 | - } | |
240 | + match a.split_whitespace().next() { | |
241 | + None => std::f64::NEG_INFINITY, | |
242 | + Some(sa) => match sa.parse::<f64>() { | |
243 | + Ok(a) if a.is_nan() => std::f64::NEG_INFINITY, | |
244 | + Ok(a) => a, | |
245 | + Err(_) => std::f64::NEG_INFINITY, | |
246 | + }, | |
247 | +} | |
248 | + | |
249 | +fn default_compare(a: &str, b: &str) -> Ordering { | |
250 | + a.cmp(b) | |
251 | } | |
252 | ||
253 | -/// Compares two floats, with errors and non-numerics assumed to be -inf. | |
254 | -/// Stops coercing at the first non-numeric char. | |
255 | +/// Compares two floating point numbers, with errors being assumed to be -inf. | |
256 | +/// Stops coercing at the first whitespace char, so 1e2 will parse as 100 but | |
257 | +/// 1,000 will parse as -inf. | |
258 | fn numeric_compare(a: &str, b: &str) -> Ordering { | |
259 | #![allow(clippy::comparison_chain)] | |
260 | ||
261 | - let sa = get_leading_number(a); | |
262 | - let sb = get_leading_number(b); | |
263 | - | |
264 | - let fa = permissive_f64_parse(sa); | |
265 | - let fb = permissive_f64_parse(sb); | |
266 | - | |
267 | - // f64::cmp isn't implemented (due to NaN issues); implement directly instead | |
268 | + let fa = permissive_f64_parse(a); | |
269 | + let fb = permissive_f64_parse(b); | |
270 | + // f64::cmp isn't implemented because NaN messes with it | |
271 | + // but we sidestep that with permissive_f64_parse so just fake it | |
272 | if fa > fb { | |
273 | Ordering::Greater | |
274 | } else if fa < fb { | |
275 | @@ -535,10 +471,10 @@ fn numeric_compare(a: &str, b: &str) -> | |
276 | } | |
277 | ||
278 | fn human_numeric_convert(a: &str) -> f64 { | |
279 | - let int_str = get_leading_number(a); | |
280 | - let (_, s) = a.split_at(int_str.len()); | |
281 | - let int_part = permissive_f64_parse(int_str); | |
282 | - let suffix: f64 = match s.parse().unwrap_or('\0') { | |
283 | + let int_str: String = a.chars().take_while(|c| c.is_numeric()).collect(); | |
284 | + let suffix = a.chars().find(|c| !c.is_numeric()); | |
285 | + let int_part = int_str.parse::<f64>().unwrap_or(-1f64) as f64; | |
286 | + let suffix: f64 = match suffix.unwrap_or('\0') { | |
287 | 'K' => 1000f64, | |
288 | 'M' => 1E6, | |
289 | 'G' => 1E9, | |
290 | @@ -565,30 +501,6 @@ fn human_numeric_size_compare(a: &str, b | |
291 | } | |
292 | } | |
293 | ||
294 | -fn random_shuffle(a: &str, b: &str, salt: String) -> Ordering { | |
295 | - #![allow(clippy::comparison_chain)] | |
296 | - let salt_slice = salt.as_str(); | |
297 | - | |
298 | - let da = hash(&[a, salt_slice].concat()); | |
299 | - let db = hash(&[b, salt_slice].concat()); | |
300 | - | |
301 | - da.cmp(&db) | |
302 | -} | |
303 | - | |
304 | -fn get_rand_string() -> String { | |
305 | - thread_rng() | |
306 | - .sample_iter(&Alphanumeric) | |
307 | - .take(16) | |
308 | - .map(char::from) | |
309 | - .collect::<String>() | |
310 | -} | |
311 | - | |
312 | -fn hash<T: Hash>(t: &T) -> u64 { | |
313 | - let mut s: XxHash64 = Default::default(); | |
314 | - t.hash(&mut s); | |
315 | - s.finish() | |
316 | -} | |
317 | - | |
318 | #[derive(Eq, Ord, PartialEq, PartialOrd)] | |
319 | enum Month { | |
320 | Unknown, | |
321 | @@ -694,65 +606,3 @@ fn open(path: &str) -> Option<(Box<dyn R | |
322 | } | |
323 | } | |
324 | } | |
325 | - | |
326 | -#[cfg(test)] | |
327 | -mod tests { | |
328 | - | |
329 | - use super::*; | |
330 | - | |
331 | - #[test] | |
332 | - fn test_default_compare() { | |
333 | - let a = "your own"; | |
334 | - let b = "your place"; | |
335 | - | |
336 | - assert_eq!(Ordering::Less, default_compare(a, b)); | |
337 | - } | |
338 | - | |
339 | - #[test] | |
340 | - fn test_numeric_compare1() { | |
341 | - let a = "149:7"; | |
342 | - let b = "150:5"; | |
343 | - | |
344 | - assert_eq!(Ordering::Less, numeric_compare(a, b)); | |
345 | - } | |
346 | - | |
347 | - #[test] | |
348 | - fn test_numeric_compare2() { | |
349 | - let a = "-1.02"; | |
350 | - let b = "1"; | |
351 | - | |
352 | - assert_eq!(Ordering::Less, numeric_compare(a, b)); | |
353 | - } | |
354 | - | |
355 | - #[test] | |
356 | - fn test_human_numeric_compare() { | |
357 | - let a = "300K"; | |
358 | - let b = "1M"; | |
359 | - | |
360 | - assert_eq!(Ordering::Less, human_numeric_size_compare(a, b)); | |
361 | - } | |
362 | - | |
363 | - #[test] | |
364 | - fn test_month_compare() { | |
365 | - let a = "JaN"; | |
366 | - let b = "OCt"; | |
367 | - | |
368 | - assert_eq!(Ordering::Less, month_compare(a, b)); | |
369 | - } | |
370 | - #[test] | |
371 | - fn test_version_compare() { | |
372 | - let a = "1.2.3-alpha2"; | |
373 | - let b = "1.4.0"; | |
374 | - | |
375 | - assert_eq!(Ordering::Less, version_compare(a, b)); | |
376 | - } | |
377 | - | |
378 | - #[test] | |
379 | - fn test_random_compare() { | |
380 | - let a = "9"; | |
381 | - let b = "9"; | |
382 | - let c = get_rand_string(); | |
383 | - | |
384 | - assert_eq!(Ordering::Equal, random_shuffle(a, b, c)); | |
385 | - } | |
386 | -} | |
387 | Index: coreutils/tests/by-util/test_sort.rs | |
388 | =================================================================== | |
389 | --- coreutils.orig/tests/by-util/test_sort.rs | |
390 | +++ coreutils/tests/by-util/test_sort.rs | |
391 | @@ -2,43 +2,22 @@ use crate::common::util::*; | |
392 | ||
393 | #[test] | |
394 | fn test_numeric_floats_and_ints() { | |
395 | - for numeric_sort_param in vec!["-n", "--numeric-sort"] { | |
396 | - let input = "1.444\n8.013\n1\n-8\n1.04\n-1"; | |
397 | - new_ucmd!() | |
398 | - .arg(numeric_sort_param) | |
399 | - .pipe_in(input) | |
400 | - .succeeds() | |
401 | - .stdout_only("-8\n-1\n1\n1.04\n1.444\n8.013\n"); | |
402 | - } | |
403 | + test_helper("numeric_floats_and_ints", "-n"); | |
404 | } | |
405 | ||
406 | #[test] | |
407 | fn test_numeric_floats() { | |
408 | - for numeric_sort_param in vec!["-n", "--numeric-sort"] { | |
409 | - let input = "1.444\n8.013\n1.58590\n-8.90880\n1.040000000\n-.05"; | |
410 | - new_ucmd!() | |
411 | - .arg(numeric_sort_param) | |
412 | - .pipe_in(input) | |
413 | - .succeeds() | |
414 | - .stdout_only("-8.90880\n-.05\n1.040000000\n1.444\n1.58590\n8.013\n"); | |
415 | - } | |
416 | + test_helper("numeric_floats", "-n"); | |
417 | } | |
418 | ||
419 | #[test] | |
420 | fn test_numeric_floats_with_nan() { | |
421 | - for numeric_sort_param in vec!["-n", "--numeric-sort"] { | |
422 | - let input = "1.444\n1.0/0.0\n1.58590\n-8.90880\n1.040000000\n-.05"; | |
423 | - new_ucmd!() | |
424 | - .arg(numeric_sort_param) | |
425 | - .pipe_in(input) | |
426 | - .succeeds() | |
427 | - .stdout_only("-8.90880\n-.05\n1.0/0.0\n1.040000000\n1.444\n1.58590\n"); | |
428 | - } | |
429 | + test_helper("numeric_floats_with_nan", "-n"); | |
430 | } | |
431 | ||
432 | #[test] | |
433 | fn test_numeric_unfixed_floats() { | |
434 | - test_helper("numeric_fixed_floats", "-n"); | |
435 | + test_helper("numeric_unfixed_floats", "-n"); | |
436 | } | |
437 | ||
438 | #[test] | |
439 | @@ -53,26 +32,12 @@ fn test_numeric_unsorted_ints() { | |
440 | ||
441 | #[test] | |
442 | fn test_human_block_sizes() { | |
443 | - for human_numeric_sort_param in vec!["-h", "--human-numeric-sort"] { | |
444 | - let input = "8981K\n909991M\n-8T\n21G\n0.8M"; | |
445 | - new_ucmd!() | |
446 | - .arg(human_numeric_sort_param) | |
447 | - .pipe_in(input) | |
448 | - .succeeds() | |
449 | - .stdout_only("-8T\n0.8M\n8981K\n21G\n909991M\n"); | |
450 | - } | |
451 | + test_helper("human_block_sizes", "-h"); | |
452 | } | |
453 | ||
454 | #[test] | |
455 | fn test_month_default() { | |
456 | - for month_sort_param in vec!["-M", "--month-sort"] { | |
457 | - let input = "JAn\nMAY\n000may\nJun\nFeb"; | |
458 | - new_ucmd!() | |
459 | - .arg(month_sort_param) | |
460 | - .pipe_in(input) | |
461 | - .succeeds() | |
462 | - .stdout_only("000may\nJAn\nFeb\nMAY\nJun\n"); | |
463 | - } | |
464 | + test_helper("month_default", "-M"); | |
465 | } | |
466 | ||
467 | #[test] | |
468 | @@ -82,23 +47,12 @@ fn test_month_stable() { | |
469 | ||
470 | #[test] | |
471 | fn test_default_unsorted_ints() { | |
472 | - let input = "9\n1909888\n000\n1\n2"; | |
473 | - new_ucmd!() | |
474 | - .pipe_in(input) | |
475 | - .succeeds() | |
476 | - .stdout_only("000\n1\n1909888\n2\n9\n"); | |
477 | + test_helper("default_unsorted_ints", ""); | |
478 | } | |
479 | ||
480 | #[test] | |
481 | fn test_numeric_unique_ints() { | |
482 | - for numeric_unique_sort_param in vec!["-nu"] { | |
483 | - let input = "9\n9\n8\n1\n"; | |
484 | - new_ucmd!() | |
485 | - .arg(numeric_unique_sort_param) | |
486 | - .pipe_in(input) | |
487 | - .succeeds() | |
488 | - .stdout_only("1\n8\n9\n"); | |
489 | - } | |
490 | + test_helper("numeric_unsorted_ints_unique", "-nu"); | |
491 | } | |
492 | ||
493 | #[test] |