0 | |
|
1 | |
#
|
2 | |
# GENERATED WITH PDL::PP! Don't modify!
|
3 | |
#
|
4 | |
package PDL::VectorValued::Utils;
|
5 | |
|
6 | |
@EXPORT_OK = qw( PDL::PP vv_rlevec PDL::PP vv_rldvec PDL::PP vv_enumvec PDL::PP vv_enumvecg PDL::PP vv_rleseq PDL::PP vv_rldseq PDL::PP vv_vsearchvec PDL::PP vv_cmpvec vv_qsortvec vv_qsortveci PDL::PP vv_union PDL::PP vv_intersect PDL::PP vv_setdiff PDL::PP v_union PDL::PP v_intersect PDL::PP v_setdiff PDL::PP vv_vcos );
|
7 | |
%EXPORT_TAGS = (Func=>[@EXPORT_OK]);
|
8 | |
|
9 | |
use PDL::Core;
|
10 | |
use PDL::Exporter;
|
11 | |
use DynaLoader;
|
12 | |
|
13 | |
|
14 | |
|
15 | |
$PDL::VectorValued::Utils::VERSION = 1.0.19;
|
16 | |
@ISA = ( 'PDL::Exporter','DynaLoader' );
|
17 | |
push @PDL::Core::PP, __PACKAGE__;
|
18 | |
bootstrap PDL::VectorValued::Utils $VERSION;
|
19 | |
|
20 | |
|
21 | |
|
22 | |
|
23 | |
|
24 | |
use strict;
|
25 | |
|
26 | |
=pod
|
27 | |
|
28 | |
=head1 NAME
|
29 | |
|
30 | |
PDL::VectorValued::Utils - Low-level utilities for vector-valued PDLs
|
31 | |
|
32 | |
=head1 SYNOPSIS
|
33 | |
|
34 | |
use PDL;
|
35 | |
use PDL::VectorValued::Utils;
|
36 | |
|
37 | |
##---------------------------------------------------------------------
|
38 | |
## ... stuff happens
|
39 | |
|
40 | |
=cut
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
=head1 FUNCTIONS
|
49 | |
|
50 | |
|
51 | |
|
52 | |
=cut
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
=pod
|
59 | |
|
60 | |
=head1 Vector-Based Run-Length Encoding and Decoding
|
61 | |
|
62 | |
=cut
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
=head2 vv_rlevec
|
69 | |
|
70 | |
=for sig
|
71 | |
|
72 | |
Signature: (c(M,N); indx [o]a(N); [o]b(M,N))
|
73 | |
|
74 | |
Run-length encode a set of vectors.
|
75 | |
|
76 | |
Higher-order rle(), for use with qsortvec().
|
77 | |
|
78 | |
Given set of vectors $c, generate a vector $a with the number of occurrences of each element
|
79 | |
(where an "element" is a vector of length $M ocurring in $c),
|
80 | |
and a set of vectors $b containing the unique values.
|
81 | |
As for rle(), only the elements up to the first instance of 0 in $a should be considered.
|
82 | |
|
83 | |
Can be used together with clump() to run-length encode "values" of arbitrary dimensions.
|
84 | |
Can be used together with rotate(), cat(), append(), and qsortvec() to count N-grams
|
85 | |
over a 1d PDL.
|
86 | |
|
87 | |
See also: PDL::Slices::rle, PDL::Ufunc::qsortvec, PDL::Primitive::uniqvec
|
88 | |
|
89 | |
|
90 | |
|
91 | |
=for bad
|
92 | |
|
93 | |
vv_rlevec does not process bad values.
|
94 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
95 | |
|
96 | |
|
97 | |
=cut
|
98 | |
|
99 | |
|
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
*vv_rlevec = \&PDL::vv_rlevec;
|
105 | |
|
106 | |
|
107 | |
|
108 | |
|
109 | |
|
110 | |
=head2 vv_rldvec
|
111 | |
|
112 | |
=for sig
|
113 | |
|
114 | |
Signature: (int a(N); b(M,N); [o]c(M,N))
|
115 | |
|
116 | |
Run-length decode a set of vectors, akin to a higher-order rld().
|
117 | |
|
118 | |
Given a vector $a() of the number of occurrences of each row, and a set $c()
|
119 | |
of row-vectors each of length $M, run-length decode to $c().
|
120 | |
|
121 | |
Can be used together with clump() to run-length decode "values" of arbitrary dimensions.
|
122 | |
|
123 | |
See also: PDL::Slices::rld.
|
124 | |
|
125 | |
|
126 | |
|
127 | |
=for bad
|
128 | |
|
129 | |
vv_rldvec does not process bad values.
|
130 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
131 | |
|
132 | |
|
133 | |
=cut
|
134 | |
|
135 | |
|
136 | |
|
137 | |
|
138 | |
sub PDL::vv_rldvec {
|
139 | |
my ($a,$b,$c) = @_;
|
140 | |
if (!defined($c)) {
|
141 | |
# XXX Need to improve emulation of threading in auto-generating c
|
142 | |
my ($rowlen) = $b->dim(0);
|
143 | |
my ($size) = $a->sumover->max;
|
144 | |
my (@dims) = $a->dims;
|
145 | |
shift(@dims);
|
146 | |
$c = $b->zeroes($b->type,$rowlen,$size,@dims);
|
147 | |
}
|
148 | |
&PDL::_vv_rldvec_int($a,$b,$c);
|
149 | |
return $c;
|
150 | |
}
|
151 | |
|
152 | |
|
153 | |
*vv_rldvec = \&PDL::vv_rldvec;
|
154 | |
|
155 | |
|
156 | |
|
157 | |
|
158 | |
|
159 | |
=head2 vv_enumvec
|
160 | |
|
161 | |
=for sig
|
162 | |
|
163 | |
Signature: (v(M,N); int [o]k(N))
|
164 | |
|
165 | |
Enumerate a list of vectors with locally unique keys.
|
166 | |
|
167 | |
Given a sorted list of vectors $v, generate a vector $k containing locally unique keys for the elements of $v
|
168 | |
(where an "element" is a vector of length $M ocurring in $v).
|
169 | |
|
170 | |
Note that the keys returned in $k are only unique over a run of a single vector in $v,
|
171 | |
so that each unique vector in $v has at least one 0 (zero) index in $k associated with it.
|
172 | |
If you need global keys, see enumvecg().
|
173 | |
|
174 | |
|
175 | |
|
176 | |
=for bad
|
177 | |
|
178 | |
vv_enumvec does not process bad values.
|
179 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
180 | |
|
181 | |
|
182 | |
=cut
|
183 | |
|
184 | |
|
185 | |
|
186 | |
|
187 | |
|
188 | |
|
189 | |
*vv_enumvec = \&PDL::vv_enumvec;
|
190 | |
|
191 | |
|
192 | |
|
193 | |
|
194 | |
|
195 | |
=head2 vv_enumvecg
|
196 | |
|
197 | |
=for sig
|
198 | |
|
199 | |
Signature: (v(M,N); int [o]k(N))
|
200 | |
|
201 | |
Enumerate a list of vectors with globally unique keys.
|
202 | |
|
203 | |
Given a sorted list of vectors $v, generate a vector $k containing globally unique keys for the elements of $v
|
204 | |
(where an "element" is a vector of length $M ocurring in $v).
|
205 | |
Basically does the same thing as:
|
206 | |
|
207 | |
$k = $v->vsearchvec($v->uniqvec);
|
208 | |
|
209 | |
... but somewhat more efficiently.
|
210 | |
|
211 | |
|
212 | |
|
213 | |
=for bad
|
214 | |
|
215 | |
vv_enumvecg does not process bad values.
|
216 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
217 | |
|
218 | |
|
219 | |
=cut
|
220 | |
|
221 | |
|
222 | |
|
223 | |
|
224 | |
|
225 | |
|
226 | |
*vv_enumvecg = \&PDL::vv_enumvecg;
|
227 | |
|
228 | |
|
229 | |
|
230 | |
|
231 | |
|
232 | |
=head2 vv_rleseq
|
233 | |
|
234 | |
=for sig
|
235 | |
|
236 | |
Signature: (c(N); indx [o]a(N); [o]b(N))
|
237 | |
|
238 | |
Run-length encode a vector of subsequences.
|
239 | |
|
240 | |
Given a vector of $c() of concatenated variable-length, variable-offset subsequences,
|
241 | |
generate a vector $a containing the length of each subsequence
|
242 | |
and a vector $b containing the subsequence offsets.
|
243 | |
As for rle(), only the elements up to the first instance of 0 in $a should be considered.
|
244 | |
|
245 | |
See also PDL::Slices::rle.
|
246 | |
|
247 | |
|
248 | |
|
249 | |
=for bad
|
250 | |
|
251 | |
vv_rleseq does not process bad values.
|
252 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
253 | |
|
254 | |
|
255 | |
=cut
|
256 | |
|
257 | |
|
258 | |
|
259 | |
|
260 | |
|
261 | |
|
262 | |
*vv_rleseq = \&PDL::vv_rleseq;
|
263 | |
|
264 | |
|
265 | |
|
266 | |
|
267 | |
|
268 | |
=head2 vv_rldseq
|
269 | |
|
270 | |
=for sig
|
271 | |
|
272 | |
Signature: (int a(N); b(N); [o]c(M))
|
273 | |
|
274 | |
Run-length decode a subsequence vector.
|
275 | |
|
276 | |
Given a vector $a() of sequence lengths
|
277 | |
and a vector $b() of corresponding offsets,
|
278 | |
decode concatenation of subsequences to $c(),
|
279 | |
as for:
|
280 | |
|
281 | |
$c = null;
|
282 | |
$c = $c->append($b($_)+sequence($a->type,$a($_))) foreach (0..($N-1));
|
283 | |
|
284 | |
See also: PDL::Slices::rld.
|
285 | |
|
286 | |
|
287 | |
|
288 | |
=for bad
|
289 | |
|
290 | |
vv_rldseq does not process bad values.
|
291 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
292 | |
|
293 | |
|
294 | |
=cut
|
295 | |
|
296 | |
|
297 | |
|
298 | |
|
299 | |
sub PDL::vv_rldseq {
|
300 | |
my ($a,$b,$c) = @_;
|
301 | |
if (!defined($c)) {
|
302 | |
my $size = $a->sumover->max;
|
303 | |
my (@dims) = $a->dims;
|
304 | |
shift(@dims);
|
305 | |
$c = $b->zeroes($b->type,$size,@dims);
|
306 | |
}
|
307 | |
&PDL::_vv_rldseq_int($a,$b,$c);
|
308 | |
return $c;
|
309 | |
}
|
310 | |
|
311 | |
|
312 | |
*vv_rldseq = \&PDL::vv_rldseq;
|
313 | |
|
314 | |
|
315 | |
|
316 | |
|
317 | |
|
318 | |
=head2 vv_vsearchvec
|
319 | |
|
320 | |
=for sig
|
321 | |
|
322 | |
Signature: (find(M); which(M,N); int [o]found())
|
323 | |
|
324 | |
=for ref
|
325 | |
|
326 | |
Routine for searching N-dimensional values - akin to vsearch() for vectors.
|
327 | |
|
328 | |
=for usage
|
329 | |
|
330 | |
$found = vsearchvec($find, $which);
|
331 | |
$nearest = $which->dice_axis(1,$found);
|
332 | |
|
333 | |
Returns for each row-vector in C<$find> the index along dimension N
|
334 | |
of the least row vector of C<$which>
|
335 | |
greater or equal to it.
|
336 | |
C<$which> should be sorted in increasing order.
|
337 | |
If the value of C<$find> is larger
|
338 | |
than any member of C<$which>, the index to the last element of C<$which> is
|
339 | |
returned.
|
340 | |
|
341 | |
See also: PDL::Primitive::vsearch().
|
342 | |
|
343 | |
|
344 | |
|
345 | |
=for bad
|
346 | |
|
347 | |
vv_vsearchvec does not process bad values.
|
348 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
349 | |
|
350 | |
|
351 | |
=cut
|
352 | |
|
353 | |
|
354 | |
|
355 | |
|
356 | |
|
357 | |
|
358 | |
*vv_vsearchvec = \&PDL::vv_vsearchvec;
|
359 | |
|
360 | |
|
361 | |
|
362 | |
|
363 | |
=pod
|
364 | |
|
365 | |
=head1 Vector-Valued Sorting and Comparison
|
366 | |
|
367 | |
The following functions are provided for lexicographic sorting of
|
368 | |
vectors, rsp. axis indices. As of PDL::VectorValued v1.0.12, vv_qsortvec() and
|
369 | |
vv_qsortveci() are just deprecated aliases for the builtin PDL functions of the same names.
|
370 | |
Older versions of this module used a dedicated implementation as a workaround
|
371 | |
for a bug in PDL-2.4.3, which has long since been fixed.
|
372 | |
|
373 | |
=cut
|
374 | |
|
375 | |
|
376 | |
|
377 | |
|
378 | |
|
379 | |
=head2 vv_cmpvec
|
380 | |
|
381 | |
=for sig
|
382 | |
|
383 | |
Signature: (a(N); b(N); int [o]cmp())
|
384 | |
|
385 | |
=for ref
|
386 | |
|
387 | |
Lexicographically compare a pair of vectors.
|
388 | |
|
389 | |
|
390 | |
|
391 | |
=for bad
|
392 | |
|
393 | |
vv_cmpvec does not process bad values.
|
394 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
395 | |
|
396 | |
|
397 | |
=cut
|
398 | |
|
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | |
|
404 | |
*vv_cmpvec = \&PDL::vv_cmpvec;
|
405 | |
|
406 | |
|
407 | |
|
408 | |
|
409 | |
=head2 vv_qsortvec
|
410 | |
|
411 | |
=for sig
|
412 | |
|
413 | |
Signature: (a(n,m); [o]b(n,m))
|
414 | |
|
415 | |
|
416 | |
=for ref
|
417 | |
|
418 | |
Deprecated alias for L<PDL::Ufunc::qsortvec()|PDL::Ufunc/qsortvec>,
|
419 | |
which see for details.
|
420 | |
|
421 | |
|
422 | |
=head2 vv_qsortveci
|
423 | |
|
424 | |
=for sig
|
425 | |
|
426 | |
Signature: (a(n,m); indx [o]ix(m))
|
427 | |
|
428 | |
=for ref
|
429 | |
|
430 | |
Deprecated alias for L<PDL::Ufunc::qsortveci()|PDL::Ufunc/qsortveci>,
|
431 | |
which see for details.
|
432 | |
|
433 | |
=cut
|
434 | |
|
435 | |
BEGIN {
|
436 | |
*vv_qsortvec = *PDL::vv_qsortvec = *PDL::qsortvec;
|
437 | |
*vv_qsortveci = *PDL::vv_qsortveci = *PDL::qsortveci;
|
438 | |
}
|
439 | |
|
440 | |
|
441 | |
|
442 | |
|
443 | |
=pod
|
444 | |
|
445 | |
=head1 Vector-Valued Set Operations
|
446 | |
|
447 | |
The following functions are provided for set operations on
|
448 | |
sorted vector-valued PDLs.
|
449 | |
|
450 | |
=cut
|
451 | |
|
452 | |
|
453 | |
|
454 | |
|
455 | |
|
456 | |
=head2 vv_union
|
457 | |
|
458 | |
=for sig
|
459 | |
|
460 | |
Signature: (a(M,NA); b(M,NB); [o]c(M,NC); int [o]nc())
|
461 | |
|
462 | |
|
463 | |
Union of two vector-valued PDLs. Input PDLs $a() and $b() B<MUST> be
|
464 | |
sorted in lexicographic order.
|
465 | |
On return, $nc() holds the actual number of vector-values in the union.
|
466 | |
|
467 | |
In scalar context, slices $c() to the actual number of elements in the union
|
468 | |
and returns the sliced PDL.
|
469 | |
|
470 | |
|
471 | |
|
472 | |
|
473 | |
=for bad
|
474 | |
|
475 | |
vv_union does not process bad values.
|
476 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
477 | |
|
478 | |
|
479 | |
=cut
|
480 | |
|
481 | |
|
482 | |
|
483 | |
|
484 | |
|
485 | |
sub PDL::vv_union {
|
486 | |
my ($a,$b,$c,$nc) = @_;
|
487 | |
$c = PDL->null if (!defined($nc));
|
488 | |
$nc = PDL->null if (!defined($nc));
|
489 | |
&PDL::_vv_union_int($a,$b,$c,$nc);
|
490 | |
return ($c,$nc) if (wantarray);
|
491 | |
return $c->slice(",0:".($nc->max-1));
|
492 | |
}
|
493 | |
|
494 | |
|
495 | |
*vv_union = \&PDL::vv_union;
|
496 | |
|
497 | |
|
498 | |
|
499 | |
|
500 | |
|
501 | |
=head2 vv_intersect
|
502 | |
|
503 | |
=for sig
|
504 | |
|
505 | |
Signature: (a(M,NA); b(M,NB); [o]c(M,NC); int [o]nc())
|
506 | |
|
507 | |
|
508 | |
Intersection of two vector-valued PDLs.
|
509 | |
Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order.
|
510 | |
On return, $nc() holds the actual number of vector-values in the intersection.
|
511 | |
|
512 | |
In scalar context, slices $c() to the actual number of elements in the intersection
|
513 | |
and returns the sliced PDL.
|
514 | |
|
515 | |
|
516 | |
|
517 | |
=for bad
|
518 | |
|
519 | |
vv_intersect does not process bad values.
|
520 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
521 | |
|
522 | |
|
523 | |
=cut
|
524 | |
|
525 | |
|
526 | |
|
527 | |
|
528 | |
|
529 | |
sub PDL::vv_intersect {
|
530 | |
my ($a,$b,$c,$nc) = @_;
|
531 | |
$c = PDL->null if (!defined($c));
|
532 | |
$nc = PDL->null if (!defined($nc));
|
533 | |
&PDL::_vv_intersect_int($a,$b,$c,$nc);
|
534 | |
return ($c,$nc) if (wantarray);
|
535 | |
my $nc_max = $nc->max;
|
536 | |
return ($nc_max > 0
|
537 | |
? $c->slice(",0:".($nc_max-1))
|
538 | |
: $c->reshape($c->dim(0), 0, ($c->dims)[2..($c->ndims-1)]));
|
539 | |
}
|
540 | |
|
541 | |
|
542 | |
*vv_intersect = \&PDL::vv_intersect;
|
543 | |
|
544 | |
|
545 | |
|
546 | |
|
547 | |
|
548 | |
=head2 vv_setdiff
|
549 | |
|
550 | |
=for sig
|
551 | |
|
552 | |
Signature: (a(M,NA); b(M,NB); [o]c(M,NC); int [o]nc())
|
553 | |
|
554 | |
|
555 | |
Set-difference ($a() \ $b()) of two vector-valued PDLs.
|
556 | |
Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order.
|
557 | |
On return, $nc() holds the actual number of vector-values in the computed vector set.
|
558 | |
|
559 | |
In scalar context, slices $c() to the actual number of elements in the output vector set
|
560 | |
and returns the sliced PDL.
|
561 | |
|
562 | |
|
563 | |
|
564 | |
=for bad
|
565 | |
|
566 | |
vv_setdiff does not process bad values.
|
567 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
568 | |
|
569 | |
|
570 | |
=cut
|
571 | |
|
572 | |
|
573 | |
|
574 | |
|
575 | |
|
576 | |
sub PDL::vv_setdiff {
|
577 | |
my ($a,$b,$c,$nc) = @_;
|
578 | |
$c = PDL->null if (!defined($c));
|
579 | |
$nc = PDL->null if (!defined($nc));
|
580 | |
&PDL::_vv_setdiff_int($a,$b,$c,$nc);
|
581 | |
return ($c,$nc) if (wantarray);
|
582 | |
my $nc_max = $nc->max;
|
583 | |
return ($nc_max > 0
|
584 | |
? $c->slice(",0:".($nc_max-1))
|
585 | |
: $c->reshape($c->dim(0), 0, ($c->dims)[2..($c->ndims-1)]));
|
586 | |
}
|
587 | |
|
588 | |
|
589 | |
*vv_setdiff = \&PDL::vv_setdiff;
|
590 | |
|
591 | |
|
592 | |
|
593 | |
|
594 | |
=pod
|
595 | |
|
596 | |
=head1 Sorted Vector Set Operations
|
597 | |
|
598 | |
The following functions are provided for set operations on
|
599 | |
flat sorted PDLs with unique values. They may be more efficient to compute
|
600 | |
than the corresponding implementations via PDL::Primitive::setops().
|
601 | |
|
602 | |
=cut
|
603 | |
|
604 | |
|
605 | |
|
606 | |
|
607 | |
|
608 | |
=head2 v_union
|
609 | |
|
610 | |
=for sig
|
611 | |
|
612 | |
Signature: (a(NA); b(NB); [o]c(NC); int [o]nc())
|
613 | |
|
614 | |
|
615 | |
Union of two flat sorted unique-valued PDLs.
|
616 | |
Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicates.
|
617 | |
On return, $nc() holds the actual number of values in the union.
|
618 | |
|
619 | |
In scalar context, reshapes $c() to the actual number of elements in the union and returns it.
|
620 | |
|
621 | |
|
622 | |
|
623 | |
=for bad
|
624 | |
|
625 | |
v_union does not process bad values.
|
626 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
627 | |
|
628 | |
|
629 | |
=cut
|
630 | |
|
631 | |
|
632 | |
|
633 | |
|
634 | |
|
635 | |
sub PDL::v_union {
|
636 | |
my ($a,$b,$c,$nc) = @_;
|
637 | |
$c = PDL->null if (!defined($c));
|
638 | |
$nc = PDL->null if (!defined($nc));
|
639 | |
&PDL::_v_union_int($a,$b,$c,$nc);
|
640 | |
return ($c,$nc) if (wantarray);
|
641 | |
return $c->slice("0:".($nc->max-1));
|
642 | |
}
|
643 | |
|
644 | |
|
645 | |
*v_union = \&PDL::v_union;
|
646 | |
|
647 | |
|
648 | |
|
649 | |
|
650 | |
|
651 | |
=head2 v_intersect
|
652 | |
|
653 | |
=for sig
|
654 | |
|
655 | |
Signature: (a(NA); b(NB); [o]c(NC); int [o]nc())
|
656 | |
|
657 | |
|
658 | |
Intersection of two flat sorted unique-valued PDLs.
|
659 | |
Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicates.
|
660 | |
On return, $nc() holds the actual number of values in the intersection.
|
661 | |
|
662 | |
In scalar context, reshapes $c() to the actual number of elements in the intersection and returns it.
|
663 | |
|
664 | |
|
665 | |
|
666 | |
=for bad
|
667 | |
|
668 | |
v_intersect does not process bad values.
|
669 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
670 | |
|
671 | |
|
672 | |
=cut
|
673 | |
|
674 | |
|
675 | |
|
676 | |
|
677 | |
|
678 | |
sub PDL::v_intersect {
|
679 | |
my ($a,$b,$c,$nc) = @_;
|
680 | |
$c = PDL->null if (!defined($c));
|
681 | |
$nc = PDL->null if (!defined($nc));
|
682 | |
&PDL::_v_intersect_int($a,$b,$c,$nc);
|
683 | |
return ($c,$nc) if (wantarray);
|
684 | |
my $nc_max = $nc->max;
|
685 | |
return ($nc_max > 0
|
686 | |
? $c->slice("0:".($nc_max-1))
|
687 | |
: $c->reshape(0, ($c->dims)[1..($c->ndims-1)]));
|
688 | |
}
|
689 | |
|
690 | |
|
691 | |
*v_intersect = \&PDL::v_intersect;
|
692 | |
|
693 | |
|
694 | |
|
695 | |
|
696 | |
|
697 | |
=head2 v_setdiff
|
698 | |
|
699 | |
=for sig
|
700 | |
|
701 | |
Signature: (a(NA); b(NB); [o]c(NC); int [o]nc())
|
702 | |
|
703 | |
|
704 | |
Set-difference ($a() \ $b()) of two flat sorted unique-valued PDLs.
|
705 | |
Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicate values.
|
706 | |
On return, $nc() holds the actual number of values in the computed vector set.
|
707 | |
|
708 | |
In scalar context, reshapes $c() to the actual number of elements in the difference set and returns it.
|
709 | |
|
710 | |
|
711 | |
|
712 | |
=for bad
|
713 | |
|
714 | |
v_setdiff does not process bad values.
|
715 | |
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles.
|
716 | |
|
717 | |
|
718 | |
=cut
|
719 | |
|
720 | |
|
721 | |
|
722 | |
|
723 | |
|
724 | |
sub PDL::v_setdiff {
|
725 | |
my ($a,$b,$c,$nc) = @_;
|
726 | |
$c = PDL->null if (!defined($c));
|
727 | |
$nc = PDL->null if (!defined($nc));
|
728 | |
&PDL::_v_setdiff_int($a,$b,$c,$nc);
|
729 | |
return ($c,$nc) if (wantarray);
|
730 | |
my $nc_max = $nc->max;
|
731 | |
return ($nc_max > 0
|
732 | |
? $c->slice("0:".($nc_max-1))
|
733 | |
: $c->reshape(0, ($c->dims)[1..($c->ndims-1)]));
|
734 | |
}
|
735 | |
|
736 | |
|
737 | |
*v_setdiff = \&PDL::v_setdiff;
|
738 | |
|
739 | |
|
740 | |
|
741 | |
|
742 | |
=pod
|
743 | |
|
744 | |
=head1 Miscellaneous Vector-Valued Operations
|
745 | |
|
746 | |
=cut
|
747 | |
|
748 | |
|
749 | |
|
750 | |
|
751 | |
|
752 | |
=head2 vv_vcos
|
753 | |
|
754 | |
=for sig
|
755 | |
|
756 | |
Signature: (a(M,N);b(M);float+ [o]vcos(N))
|
757 | |
|
758 | |
|
759 | |
Computes the vector cosine similarity of a dense vector $b() with respect to each row $a(*,i)
|
760 | |
of a dense PDL $a(). This is basically the same thing as:
|
761 | |
|
762 | |
($a * $b)->sumover / ($a->pow(2)->sumover->sqrt * $b->pow(2)->sumover->sqrt)
|
763 | |
|
764 | |
... but should be must faster to compute, and avoids allocating potentially large temporaries for
|
765 | |
the vector magnitudes. Output values in $vcos() are cosine similarities in the range [-1,1],
|
766 | |
except for zero-magnitude vectors which will result in NaN values in $vcos().
|
767 | |
|
768 | |
You can use PDL threading to batch-compute distances for multiple $b() vectors simultaneously:
|
769 | |
|
770 | |
$bx = random($M, $NB); ##-- get $NB random vectors of size $N
|
771 | |
$vcos = vv_vcos($a,$bx); ##-- $vcos(i,j) ~ sim($a(,i),$b(,j))
|
772 | |
|
773 | |
|
774 | |
|
775 | |
=for bad
|
776 | |
|
777 | |
vv_vcos() will set the bad status flag on the output piddle $vcos() if it is set on either of the input
|
778 | |
piddles $a() or $b(), but BAD values will otherwise be ignored for computing the cosine similarity.
|
779 | |
|
780 | |
|
781 | |
=cut
|
782 | |
|
783 | |
|
784 | |
|
785 | |
|
786 | |
|
787 | |
|
788 | |
*vv_vcos = \&PDL::vv_vcos;
|
789 | |
|
790 | |
|
791 | |
|
792 | |
|
793 | |
##---------------------------------------------------------------------
|
794 | |
=pod
|
795 | |
|
796 | |
=head1 ACKNOWLEDGEMENTS
|
797 | |
|
798 | |
=over 4
|
799 | |
|
800 | |
=item *
|
801 | |
|
802 | |
Perl by Larry Wall
|
803 | |
|
804 | |
=item *
|
805 | |
|
806 | |
PDL by Karl Glazebrook, Tuomas J. Lukka, Christian Soeller, and others.
|
807 | |
|
808 | |
=item *
|
809 | |
|
810 | |
Code for rlevec() and rldvec() derived from the PDL builtin functions
|
811 | |
rle() and rld() in $PDL_SRC_ROOT/Basic/Slices/slices.pd
|
812 | |
|
813 | |
=back
|
814 | |
|
815 | |
=cut
|
816 | |
|
817 | |
##----------------------------------------------------------------------
|
818 | |
=pod
|
819 | |
|
820 | |
=head1 KNOWN BUGS
|
821 | |
|
822 | |
Probably many.
|
823 | |
|
824 | |
=cut
|
825 | |
|
826 | |
|
827 | |
##---------------------------------------------------------------------
|
828 | |
=pod
|
829 | |
|
830 | |
=head1 AUTHOR
|
831 | |
|
832 | |
Bryan Jurish E<lt>moocow@cpan.orgE<gt>
|
833 | |
|
834 | |
|
835 | |
=head1 COPYRIGHT
|
836 | |
|
837 | |
=over 4
|
838 | |
|
839 | |
=item *
|
840 | |
|
841 | |
Code for qsortvec() copyright (C) Tuomas J. Lukka 1997.
|
842 | |
Contributions by Christian Soeller (c.soeller@auckland.ac.nz)
|
843 | |
and Karl Glazebrook (kgb@aaoepp.aao.gov.au). All rights
|
844 | |
reserved. There is no warranty. You are allowed to redistribute this
|
845 | |
software / documentation under certain conditions. For details, see
|
846 | |
the file COPYING in the PDL distribution. If this file is separated
|
847 | |
from the PDL distribution, the copyright notice should be included in
|
848 | |
the file.
|
849 | |
|
850 | |
|
851 | |
=item *
|
852 | |
|
853 | |
All other parts copyright (c) 2007-2022, Bryan Jurish. All rights reserved.
|
854 | |
|
855 | |
This package is free software, and entirely without warranty.
|
856 | |
You may redistribute it and/or modify it under the same terms
|
857 | |
as Perl itself.
|
858 | |
|
859 | |
=back
|
860 | |
|
861 | |
|
862 | |
=head1 SEE ALSO
|
863 | |
|
864 | |
perl(1), PDL(3perl)
|
865 | |
|
866 | |
=cut
|
867 | |
|
868 | |
|
869 | |
|
870 | |
;
|
871 | |
|
872 | |
|
873 | |
|
874 | |
# Exit with OK status
|
875 | |
|
876 | |
1;
|
877 | |
|
878 | |
⏎
|