use strict; use warnings; package Hash::MostUtils; use base qw(Exporter); use Carp qw(confess); use Hash::MostUtils::leach qw(n_each leach); our $VERSION = 1.05; our @EXPORT_OK = qw( lvalues lkeys leach hash_slice_of hash_slice_by hashmap hashgrep hashapply n_each n_map n_grep n_apply reindex rekey revalue ); # decrementing $| flips it between 0 and 1 sub lkeys { local $|; return grep { $|-- == 0 } @_ } sub lvalues { local $|; return grep { $|-- == 1 } @_ } # I would put leach() here, but it was imported above *hashmap = sub(&@) { unshift @_, 2; goto &n_map }; *hashgrep = sub(&@) { unshift @_, 2; goto &n_grep }; *hashapply = sub (&@) { unshift @_, 2; goto &n_apply }; # I would put n_each() here, but it was imported above sub n_map ($&@) { # Usually I don't mutate @_. Here I deliberately modify @_ for the upcoming non-obvious goto-&NAME. my $n = shift; my $collector = sub { return $_[0]->() }; unshift @_, $collector; # Using a "safe goto" allows n_map() to remove itself from the callstack, which allows _n_collect() # to see the correct caller. # # 'perldoc -f goto' for why this is a safe goto. goto &{_n_collect($n)}; } sub n_grep ($&@) { my $n = shift; # the comments in n_map() apply here as well. my $collector = sub { my ($code, $vals, $aliases) = @_; return $code->() ? @$vals : (); }; unshift @_, $collector; goto &{_n_collect($n)}; } sub n_apply { my $n = shift; my $collector = sub { my ($code, $vals, $aliases) = @_; $code->(); return map { $$_ } @$aliases; }; unshift @_, $collector; goto &{_n_collect($n)}; } sub _n_collect($) { my ($n) = @_; return sub(&@) { my $collector = shift; my $code = shift; if (@_ % $n != 0) { confess("your input is insane: can't evenly slice " . @_ . " elements into $n-sized chunks\n"); } # these'll reserve some namespace back in the callpackage my @n = ('a' .. 'z'); # stash old values back in callpackage *and* in main. If called from main::, this comes down to: # local ${'main::a'}, ${'main::b'}, ${'main::c'} # when $n is 3. my $caller = caller; no strict 'refs'; foreach ((@n[ 0 .. $n-1 ])) { local ${"$caller\::$_"}; local ${"::$_"}; } my @out; while (my @chunk = splice @_, 0, $n) { # build up each set... my @aliases; foreach (0 .. $#chunk) { # ...assign values from @_ back to localized variables in $caller *and* in 'main::'. # Aliasing in main:: allows you to refer to variables $c and onwards as $::c. # Aliasing in $caller allows you to refer to variables $c and onwards as $whatever::package::c. ${"::$n[$_]"} = ${"$caller\::$n[$_]"} = $chunk[$_]; # Keep a reference to $::a (etc.) and pass them in to the $collector; this allows $code to mutate # $::a (etc) and signal the changed values back to $collector. push @aliases, \${"::$n[$_]"}; } push @out, $collector->($code, \@chunk, \@aliases); # ...and apply $code. } return @out; }; } sub hash_slice_of { my ($ref, @keys) = @_; return map { ($_ => $ref->{$_}) } @keys; } sub hash_slice_by { my ($obj, @methods) = @_; return map { ($_ => scalar($obj->$_)) } @methods; } sub rekey (&@) { my %map = shift()->(); return n_map 2, sub { $map{$a} || $a => $b }, @_; } sub reindex (&@) { my %map = shift()->(); @_[values %map] = delete @_[keys %map]; return @_; } sub revalue (&@) { my %map = shift()->(); return n_map 2, sub { $a => $map{$b} || $b }, @_; } 1; __END__ =head1 NAME Hash::MostUtils - Yet another collection of tools for operating pairwise on lists. =head1 SYNOPSIS my @found_and_transformed = hashmap { uc($b) => 100 + $a } hashgrep { $a < 100 && $b =~ /[aeiou]/i } ( 1 => 'cwm', 2 => 'apple', 100 => 'cherimoya', ); my @keys = lkeys @found_and_transformed; my @vals = lvalues @found_and_transformed; foreach my $key (@keys) { my $value = shift @vals; print "$key => $val\n"; } while (my ($key, $val) = leach @found_and_transformed) { print "$key => $val\n"; } =head1 EXPORTS By default, none. On request, any of the following: =head1 FUNCTIONS TO MAKE ARRAYS ACT LIKE HASHES =head2 lkeys LIST Return the "keys" of LIST. Perl's C<keys()> keyword only operates on hashes; lkeys() offers an approximation of the same functionality for lists. my @evens = lkeys 1..10; my @keys = lkeys # give me back those keys (i.e. the letters) hashgrep { $b > 100 } # find key/value pairs where the value is > 100 map { $_ => int(rand(1000)) } 'a'..'z'; # turn 'a'..'z' into key/value pairs with random values The "keys" of a list are the even-positioned items. Note that in the case of an C<E<gt>empty slotE<lt>> in a sparse array, the key will be C<undef>. =head2 lvalues LIST Return the "values" of LIST. Perl's C<values()> keyword only operates on hashes; lvalues() offers an approximation of the same functionality for lists. my @odds = lkeys 1..10; my @values = lvalues # give me back those values (i.e. the letters) hashgrep { $a > 100 } # look for key/value pairs where the key is > 100 map { int(rand(1000)) => $_ } 'a'..'z'; # make 26 random keys from 1-1000, with fixed keys The "values" of a list are the odd-positioned items. Note that in the case of an C<E<gt>empty slotE<lt>> in a sparse array, the value will be C<undef>. =head2 leach [ ARRAY | HASH | ARRAYREF | HASHREF ] Iterate over an ARRAY, HASH, ARRAYREF, or HASHREF, returning successive "key/value" pairs. This behaves functionally identically to Perl's built-in C<each> keyword; however, it is useful for arrays and array- and hash-references. This function handles objects which are built around blessed array- and hash-references. my @array = (1..4); while (my ($k, $v) = leach @array) { print "$k => $v\n"; } print "$_\n" for @array; __END__ 1 => 2 3 => 4 1 2 3 4 Using C<leach> to gather key/value pairs from a collection is guaranteed to be non-destructive to that collection. One pattern that's useful for iterating arrays and arrary references in pairs is to use C<splice>, which has the possibly unintended side effect of destroying the subject collection: my @array = (1..4); while (my ($k, $v) = splice @array, 0, 2) { print "$k => $v\n"; } print "$_\n" for @array; __END__ 1 => 2 3 => 4 Note the distinction between saying that this function is leach ARRAY rather than leach LIST Perl does not allow this behavior: while (my ($k, $v) = leach 1..10) { # can't leach a list, only an array # do something with this key/value tuple } But don't worry, Perl also doesn't allow for this behavior: while (my ($k, $v) = splice 1..10, 0, 2) { # can't splice a list, only an array # do something with this key/value tuple } =head1 FUNCTIONS TO OPERATE ON LISTS, ARRAYS, AND HASHES AS TUPLES C<hashmap>, C<hashgrep>, and C<hashapply> all act like their corresponding C<map>, C<grep>, and C<List::Utils::apply> but for one notable exception: whereas C<map>, C<grep>, and C<apply> all eat items from the given list one-by-one and assign that current value to $_, C<hashmap>, C<hashgrep>, and C<hashapply> all eat items from the given list two-by-two, and assigns them to $a and $b. The names $a and $b were chosen because they're already in lexical scope in Perl due to C<sort>'s need for them. If you have a singular occurance of $a and $b within your program, you will probably see this warning from Perl: Name 'main::a' used only once: possible typo at ... Name 'main::b' used only once: possible typo at ... I've just gotten in the habit of adding: use strict; use warnings; no warnings 'once'; when I see that message. =head2 hashmap BLOCK LIST This acts similar to map BLOCK LIST with the exception that C<map> eats items off of LIST one at a time, assigning the current value to $_; whereas C<hashmap> eats items off of LIST two at a time, assigning the first value to $a and the second value to $b. # naive transformation of this hash into (101 => 'A', 102 => 'B') my %hash = ( a => 1, b => 2, ); my %transformed = hashmap { $b + 100 => uc($a) } %hash; Just like C<map>, your BLOCK will be called without any arguments. Like perl's keyword C<map>, this function maintains the order of LIST. C<hashmap> is simply a prototyped alias for n_map(2, CODEREF, LIST), so all of the documentation to C<n_map> applies here. =head2 hashgrep BLOCK LIST This acts similar to grep BLOCK LIST with the exception that C<grep> eats items off of LIST one at a time, assigning the current value to $_; whereas C<hashgrep> eats items off of LIST two at a time, assigning the first value to $a and the second value to $b. # lame object dumper my $object = Some::Class->new(...); my %dump = hashgrep { $a !~ /^_/ && ! ref($b) } # hide private fields and internal data structures %$object; Just like C<grep>, your BLOCK will be called without any arguments. Like perl's keyword C<grep>, this function maintains the order of LIST. C<hashgrep> is simply a prototyped alias for n_grep(2, CODEREF, LIST), so all of the documentation to C<n_grep> applies here. =head2 hashapply BLOCK LIST This is similar to C<List::MoreUtils::apply>: apply BLOCK LIST with the usual exception: C<apply> eats items off of LIST one at a time, assigning to $_; whereas C<hashapply> eats items off of LIST two at a time, assigning the first value to $a and the second value to $b. Normal C<apply> can be written as map: =over 4 my @words = qw(apple banana cherimoya); my @clean1 = map { tr/aeiou//d; $_ } @words; # @clean1 = @words = qw(ppl bnn chrmy); @words = qw(apple banana cherimoya); my @clean2 = apply { tr/aeiou//d } @words; # @clean2 = qw(ppl bnn chrmy); @words = qw(apple banana cherimoya); =back Note that C<apply> does not transform the original data, whereas C<map> does. Similarly, C<hashapply> does not transform the original data, whereas C<hashmap> might. Note that C<apply> does not need to explicitly return $_, whereas C<map> does. Similarly, C<hashapply> does not need to explicitly return a key/value tuple ($a, $b), whereas C<hashmap> does need to return something. Like C<apply>, C<hashapply> will not transform the original LIST. =head1 GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS Each of the pairwise functions mentioned so far - C<leach>, C<hashmap>, C<hashgrep>, C<hashapply> - are actually implemented in terms of more generic N-ary forms. This means that if you need to process a list in sets of N, where N is E<gt> 2, you may use the n_* forms of these functions. Variable naming becomes more interesting when moving beyond 2 items. Whereas $a and $b are always in lexical scope, once you go to N of 3, you need to agree on some variable naming convention. $a and $b work nicely for the first two elements of a list; so $c is the third, and $d the fourth, and so on. One limitation of this naming scheme is that you may not easily go beyond N of 26 - but if you find yourself needing that, you'll find the code simple to extend. In order to prevent 'strict refs' from complaining about $c..$z, you'll need to address those variables a bit differently: my @sets = n_map 6, sub { [$a, $b, $::c, $::d, $::e, $::f] }, n_apply 3, sub { $_ *= 3 for $a, $b, $::c }, n_grep 3, sub { $::c > 4 }, (1..9); # @sets = ([12, 15, 18, 21, 24, 27]); I personally find the transition between C<$b> and C<$::c> to be a bit jarring visually, so the one time I wrote a line like the above I chose to write it as C<$::a> and C<$::b>. my @sets = n_map 6, sub { [$::a, $::b, $::c, $::d, $::e, $::f] }, n_apply 3, sub { $_ *= 3 for $::a, $::b, $::c }, n_grep 3, sub { $::c > 4 }, (1..9); # @sets = ([12, 15, 18, 21, 24, 27]); =head2 n_each N, LIST Iterate over LIST, returning successive "key/values" sets. my @list = (1..9); while (my ($k, @v) = n_each 3, @list) { # do something with this $k and @v } There's nothing that says your N needs to remain constant: my @list = ( a => 1, b => 1, 2, c => 1, 2, 3, d => 1, 2, 3, 4, ); my $n = 2; my %triangle; while (my ($k, @v) = n_each $n++, @list) { $triangle{$k} = \@v; } __END__ %triangle = ( a => [1], b => [1, 2], c => [1, 2, 3], d => [1, 2, 3, 4], ); There's probably something clever that you can do with this that I just don't understand. Please drop me a line if you know what it is. =head2 n_map N, CODEREF, LIST C<map> CODEREF over LIST, operating in N-sized chunks. Within the context of CODEREF, values of LIST will be selected and aliased. LIST must be evenly divisible by N. See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names. my @transformed = n_map( 3, sub { "$a, $b $::c!\n" }, qw(goodnight sweet prince goodbye cruel world), ); # @transformed = ("goodnight, sweet prince!\n", "goodbye, cruel world!"); If you are consistently n_map'ping by some N, then you might consider wrapping n_map so the call syntax looks more like one of Perl's functional keywords: sub tri_map (&@) { unshift @_, 3; goto &n_map } my @transformed = tri_map { "$::a, $::b $::c!\n" } qw(goodnight sweet prince goodbye cruel world); # @transformed = ("goodnight, sweet prince!\n", "goodbye, cruel world!"); =head2 n_grep N, CODEREF, LIST C<grep> for CODEREF over LIST, operating in N-sized chunks. Within the context of CODEREF, values of LIST will be selected and aliased. LIST must be evenly divisible by N. See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names. my @found = n_grep( 3, sub { $a =~ /good/ && $::c =~ /prince/ }, qw(goodnight sweet prince goodbye cruel world), ); # @found = qw(goodnight sweet prince); Just as with C<n_map>, writing a small bit of gloss to make your N of n_grep work in a functional manner is simple, and makes your code more readable: sub tri_grep (&@) { unshift @_, 3; goto &n_grep } my @found = tri_grep { $::a =~ /good/ && $::c =~ /prince/ } qw(goodnight sweet prince goodbye cruel world); # @found = qw(goodnight sweet prince); =head2 n_apply N, CODEREF, LIST C<List::Utils::apply> CODEREF to LIST, operating in N-sized chunks. LIST must be evenly divisible by N. See L<GENERIC N-ARY FORMS OF VARIOUS LIST-WISE FUNCTIONS> for a discussion of variable names. my @uppercase = n_apply( 3, sub { uc $::c } qw(goodnight sweet prince goodbye cruel world), ); # @uppercase = qw(goodnight sweet PRINCE goodbye cruel WORLD); Just as with C<n_map>, writing a small bit of gloss to make your N of n_apply work in a functional manner is simple, and makes your code more readable: sub tri_apply (&@) { unshift @_, 3; goto &n_apply } my @uppercase = tri_apply { uc $::c } qw(goodnight sweet prince goodbye cruel world); # @uppercase = qw(goodnight sweet PRINCE goodbye cruel WORLD); =head1 GRAB BAG I like these functions, but they're decidedly different from everything up to this point. They are mostly used to turn an existing hash reference or object into a smaller representation of itself. =head2 hash_slice_of HASHREF, LIST Looks into HASHREF and extracts the key/value pairs of the keys named in LIST. If a key in LIST is not present in HASHREF, returns undefined. my %hash = (1..10); my %slice = hash_slice_of \%hash, qw(5, 7, 9, 11); __END__ %slice = ( 5 => 6, 7 => 8, 9 => 10, 11 => undef, ); If you only want to get back key/value pairs for keys in LIST that exist in HASHREF, just add a C<hashgrep>: my %hash = (1..10); my %slice = hashgrep { exists $hash{$a} } hash_slice_of \%hash, qw(5, 7, 9, 11); __END__ %slice = ( 5 => 6, 7 => 8, 9 => 10, ); =head2 hash_slice_by OBJECT, LIST Calls the methods named in LIST on OBJECT and returns a hash of the results. If a method in LIST can not be performed on OBJECT, you will get the standard "Can't call method ->... on object" error that Perl throws in this circumstance. my $object = ...; my %out = hash_slice_by $object, qw(foo bar baz); __END__ %out = ( foo => 'output of foo', bar => 'output of bar', baz => 'output of baz', ); Note that you may not use C<hash_slice_by> to pass arguments to the methods given in LIST. Note too that your methods are invoked in scalar context. =head2 rekey BLOCK HASH Rename the keys in HASH by the mapping table provided by BLOCK. HASH may be a real hash, or it may be an array that you are treating like a key/value store. my %hash = (crow => 'black', snow => 'white', libro => 'read all over'); my %spanish = rekey { crow => 'corvino', snow => 'nieve' } %hash; __END__ %spanish = ( corvino => 'black', nieve => 'white', libro => 'read all over', ); =head2 revalue BLOCK HASH Rename the values in HASH to the mapping table provided by BLOCK. HASH may be a real hash, or it may be an array that you are treating like a key/value store. my @start = (apple => 'red', apple => 'green'); my @translated = revalue { red => 'rojo', green => 'verde' } @start; __END__ @translated = ( apple => 'rojo', apple => 'verde', ); =head2 reindex BLOCK LIST Reorder the values in LIST by the mapping table provided by BLOCK. LIST may be either an array or a list. In general this function will not work on hashes. my @array = (1..5); my @reindexed = reindex { map { $_ => $_ + 1 } 0..$#array } @array; __END__ @reindexed = (undef, 1..5); =head1 ACKNOWLEDGEMENTS The names and behaviors of most of these functions were initially developed at AirWave Wireless, Inc. I've re-implemented them here. This software would be trapped on my hard drive were it not for Logan Bell's encouragement to release it. Separating the personal time I have put into this from the professional time afforded by my employer, Shutterstock, Inc. would be very difficult. Thankfully I haven't needed to; when I asked to share this, Dan McCormick simply said, "Go for it! Thanks for hacking." =head1 COPYRIGHT AND LICENSE (c) 2013 by Belden Lyman This library is free software: you may redistribute it and/or modify it under the same terms as Perl itself; either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available.