0 | 0 |
|
1 | 1 |
require 5;
|
2 | 2 |
package Sort::Naturally; # Time-stamp: "2004-12-29 18:30:03 AST"
|
3 | |
$VERSION = '1.02';
|
|
3 |
$VERSION = '1.03';
|
4 | 4 |
@EXPORT = ('nsort', 'ncmp');
|
5 | 5 |
require Exporter;
|
6 | 6 |
@ISA = ('Exporter');
|
|
54 | 54 |
} else {
|
55 | 55 |
$rv = 0;
|
56 | 56 |
}
|
57 | |
|
|
57 |
|
58 | 58 |
unless($rv) {
|
59 | 59 |
# Normal case:
|
60 | 60 |
$rv = 0;
|
61 | 61 |
DEBUG and print "<$x> and <$y> compared...\n";
|
62 | |
|
|
62 |
|
63 | 63 |
Consideration:
|
64 | 64 |
while(length $x and length $y) {
|
65 | |
|
|
65 |
|
66 | 66 |
DEBUG > 2 and print " <$x> and <$y>...\n";
|
67 | |
|
|
67 |
|
68 | 68 |
# First, non-numeric comparison:
|
69 | 69 |
$x2 = ($x =~ m/^(\D+)/s) ? length($1) : 0;
|
70 | 70 |
$y2 = ($y =~ m/^(\D+)/s) ? length($1) : 0;
|
71 | 71 |
# Now make x2 the min length of the two:
|
72 | 72 |
$x2 = $y2 if $x2 > $y2;
|
73 | 73 |
if($x2) {
|
74 | |
DEBUG > 1 and printf " <%s> and <%s> lexically for length $x2...\n",
|
|
74 |
DEBUG > 1 and printf " <%s> and <%s> lexically for length $x2...\n",
|
75 | 75 |
substr($x,0,$x2), substr($y,0,$x2);
|
76 | 76 |
do {
|
77 | 77 |
my $i = substr($x,0,$x2);
|
|
80 | 80 |
print "SCREAM! on <$i><$j> -- $sv != $rv \n" unless $rv == $sv;
|
81 | 81 |
last;
|
82 | 82 |
}
|
83 | |
|
84 | |
|
|
83 |
|
|
84 |
|
85 | 85 |
if $rv =
|
86 | |
# The ''. things here force a copy that seems to work around a
|
|
86 |
# The ''. things here force a copy that seems to work around a
|
87 | 87 |
# mysterious intermittent bug that 'use locale' provokes in
|
88 | 88 |
# many versions of Perl.
|
89 | 89 |
$cmp
|
|
99 | 99 |
substr($x,0,$x2) = '';
|
100 | 100 |
substr($y,0,$x2) = '';
|
101 | 101 |
}
|
102 | |
|
|
102 |
|
103 | 103 |
# Now numeric:
|
104 | 104 |
# (actually just using $x2 and $y2 as scratch)
|
105 | 105 |
|
|
113 | 113 |
last if $rv = $x2 <=> $1;
|
114 | 114 |
} else {
|
115 | 115 |
# ARBITRARILY large integers!
|
116 | |
|
|
116 |
|
117 | 117 |
# This saves on loss of precision that could happen
|
118 | 118 |
# with actual stringification.
|
119 | 119 |
# Also, I sense that very large numbers aren't too
|
120 | 120 |
# terribly common in sort data.
|
121 | |
|
|
121 |
|
122 | 122 |
# trim leading 0's:
|
123 | 123 |
($y2 = $1) =~ s/^0+//s;
|
124 | 124 |
$x2 =~ s/^0+//s;
|
|
137 | 137 |
# X is numeric but Y isn't
|
138 | 138 |
$rv = Y_FIRST;
|
139 | 139 |
last;
|
140 | |
}
|
|
140 |
}
|
141 | 141 |
} elsif( $y =~ s/^\d+//s ) { # we don't need to capture the substring
|
142 | 142 |
$rv = X_FIRST;
|
143 | 143 |
last;
|
|
165 | 165 |
($cmp,$lc) = @{shift @_} if @_ and ref($_[0]) eq 'ARRAY';
|
166 | 166 |
|
167 | 167 |
return @_ unless @_ > 1 or wantarray; # be clever
|
168 | |
|
|
168 |
|
169 | 169 |
my($x, $x2, $y, $y2, $rv); # scratch vars
|
170 | 170 |
|
171 | 171 |
# We use a Schwartzian xform to memoize the lc'ing and \W-removal
|
|
174 | 174 |
sort {
|
175 | 175 |
if($a->[0] eq $b->[0]) { 0 } # trap this expensive case
|
176 | 176 |
else {
|
177 | |
|
|
177 |
|
178 | 178 |
$x = $a->[1];
|
179 | 179 |
$y = $b->[1];
|
180 | 180 |
|
|
187 | 187 |
|| ($x cmp $y )
|
188 | 188 |
|| ($a->[0] cmp $b->[0])
|
189 | 189 |
;
|
190 | |
|
|
190 |
|
191 | 191 |
DEBUG > 1 and print " <${$a}[0]> cmp <${$b}[0]> is $rv ($ORD[$rv])\n";
|
192 | 192 |
$rv;
|
193 | 193 |
}}
|
|
219 | 219 |
}
|
220 | 220 |
my($a,$b) = @_;
|
221 | 221 |
my($x, $x2, $y, $y2, $rv); # scratch vars
|
222 | |
|
|
222 |
|
223 | 223 |
DEBUG > 1 and print "ncmp args <$a><$b>\n";
|
224 | 224 |
if($a eq $b) { # trap this expensive case
|
225 | 225 |
0;
|
|
228 | 228 |
$x =~ s/\W+//s;
|
229 | 229 |
$y = ($lc ? $lc->($b) : lc($b));
|
230 | 230 |
$y =~ s/\W+//s;
|
231 | |
|
|
231 |
|
232 | 232 |
~COMPARATOR~
|
233 | 233 |
|
234 | 234 |
|
|
239 | 239 |
|| ($x cmp $y)
|
240 | 240 |
|| ($a cmp $b)
|
241 | 241 |
;
|
242 | |
|
|
242 |
|
243 | 243 |
DEBUG > 1 and print " <$a> cmp <$b> is $rv\n";
|
244 | 244 |
$rv;
|
245 | 245 |
}
|
|
390 | 390 |
map { [$_, make_a_sort_key_from($_) ]
|
391 | 391 |
@_
|
392 | 392 |
;
|
393 | |
|
|
393 |
|
394 | 394 |
...you wight want something that replaces not C<sort>, but C<cmp>.
|
395 | 395 |
That's what Sort::Naturally's C<ncmp> function is for. Call it with
|
396 | 396 |
the syntax C<ncmp($left,$right)> instead of C<$left cmp $right>,
|
|
502 | 502 |
sub nsort {
|
503 | 503 |
my($cmp, $lc);
|
504 | 504 |
return @_ if @_ < 2; # Just to be CLEVER.
|
505 | |
|
|
505 |
|
506 | 506 |
my($x, $i); # scratch vars
|
507 | |
|
|
507 |
|
508 | 508 |
# And now, the GREAT BIG Schwartzian transform:
|
509 | |
|
|
509 |
|
510 | 510 |
map
|
511 | 511 |
$_->[0],
|
512 | 512 |
|
|
541 | 541 |
|
542 | 542 |
map {
|
543 | 543 |
my @bit = ($x = defined($_) ? $_ : '');
|
544 | |
|
|
544 |
|
545 | 545 |
if($x =~ m/^[+-]?(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?\z/s) {
|
546 | 546 |
# It's entirely purely numeric, so treat it specially:
|
547 | 547 |
push @bit, '', $x;
|
|
564 | 564 |
# ['foo32.pl' => 'foo', 32, , '.pl', 0 ]
|
565 | 565 |
# ['foo325.pl' => 'foo', 325, , '.pl', 0 ]
|
566 | 566 |
# Yes, always an ODD number of elements.
|
567 | |
|
|
567 |
|
568 | 568 |
\@bit;
|
569 | 569 |
}
|
570 | 570 |
@_;
|
|
575 | 575 |
|
576 | 576 |
sub nsorts {
|
577 | 577 |
return @_ if @_ < 2; # Just to be CLEVER.
|
578 | |
|
|
578 |
|
579 | 579 |
my($x, $i); # scratch vars
|
580 | |
|
|
580 |
|
581 | 581 |
# And now, the GREAT BIG Schwartzian transform:
|
582 | |
|
|
582 |
|
583 | 583 |
map
|
584 | 584 |
$_->[0],
|
585 | 585 |
|
|
614 | 614 |
|
615 | 615 |
map {
|
616 | 616 |
my @bit = ($x = defined($_) ? $_ : '');
|
617 | |
|
|
617 |
|
618 | 618 |
while(length $x) {
|
619 | 619 |
push @bit, ($x =~ s/^(\D+)//s) ? lc($1) : '';
|
620 | 620 |
push @bit, ($x =~ s/^(\d+)//s) ? $1 : 0;
|
|
631 | 631 |
# ['foo32.pl' => 'foo', 32, , '.pl', 0 ]
|
632 | 632 |
# ['foo325.pl' => 'foo', 325, , '.pl', 0 ]
|
633 | 633 |
# Yes, always an ODD number of elements.
|
634 | |
|
|
634 |
|
635 | 635 |
\@bit;
|
636 | 636 |
}
|
637 | 637 |
@_;
|
|
642 | 642 |
|
643 | 643 |
sub nsort0 {
|
644 | 644 |
return @_ if @_ < 2; # Just to be CLEVER.
|
645 | |
|
|
645 |
|
646 | 646 |
my($x, $i); # scratch vars
|
647 | |
|
|
647 |
|
648 | 648 |
# And now, the GREAT BIG Schwartzian transform:
|
649 | |
|
|
649 |
|
650 | 650 |
map
|
651 | 651 |
$_->[0],
|
652 | 652 |
|
|
681 | 681 |
|
682 | 682 |
map {
|
683 | 683 |
my @bit = ($x = defined($_) ? $_ : '');
|
684 | |
|
|
684 |
|
685 | 685 |
if($x =~ m/^[+-]?(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?\z/s) {
|
686 | 686 |
# It's entirely purely numeric, so treat it specially:
|
687 | 687 |
push @bit, '', $x;
|
|
702 | 702 |
}
|
703 | 703 |
}
|
704 | 704 |
DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
|
705 | |
|
|
705 |
|
706 | 706 |
\@bit;
|
707 | 707 |
}
|
708 | 708 |
@_;
|
|
714 | 714 |
|
715 | 715 |
sub nsortf {
|
716 | 716 |
return @_ if @_ < 2; # Just to be CLEVER.
|
717 | |
|
|
717 |
|
718 | 718 |
my($x, $i); # scratch vars
|
719 | |
|
|
719 |
|
720 | 720 |
# And now, the GREAT BIG Schwartzian transform:
|
721 | |
|
|
721 |
|
722 | 722 |
map
|
723 | 723 |
$_->[0],
|
724 | 724 |
|
|
748 | 748 |
$x || (@$a <=> @$b ) || ($a->[1] cmp $b->[1])
|
749 | 749 |
|| ($a->[2] <=> $b->[2]) || ($a->[0] cmp $b->[0]);
|
750 | 750 |
# unless we found a result for $x in the while loop,
|
751 | |
# use length as a tiebreaker, otherwise use the
|
|
751 |
# use length as a tiebreaker, otherwise use the
|
752 | 752 |
# lc'd extension, otherwise the verison, otherwise use
|
753 | 753 |
# the original string as a fallback tiebreaker.
|
754 | 754 |
}
|
755 | 755 |
|
756 | 756 |
map {
|
757 | 757 |
my @bit = ( ($x = defined($_) ? $_ : ''), '',0 );
|
758 | |
|
|
758 |
|
759 | 759 |
{
|
760 | 760 |
# Consume the string.
|
761 | |
|
|
761 |
|
762 | 762 |
# First, pull off any VAX-style version
|
763 | 763 |
$bit[2] = $1 if $x =~ s/;(\d+)$//;
|
764 | |
|
|
764 |
|
765 | 765 |
# Then pull off any apparent extension
|
766 | 766 |
if( $x !~ m/^\.+$/s and # don't mangle ".", "..", or "..."
|
767 | 767 |
$x =~ s/(\.[^\.\;]*)$//sg
|
|
799 | 799 |
}
|
800 | 800 |
}
|
801 | 801 |
}
|
802 | |
|
|
802 |
|
803 | 803 |
DEBUG and print "$bit[0] => ", map("{$_} ", @bit), "\n";
|
804 | |
|
|
804 |
|
805 | 805 |
\@bit;
|
806 | 806 |
}
|
807 | 807 |
@_;
|