Matching algorithms (with attachment)

Mon Jun 23 06:39:55 BST 2008

On Mon, Jun 23, 2008 at 03:20:02PM +1000, Toby Corkindale wrote:
[snip]
> I'll attach an example program that demonstrates what I'm doing, and uses the
> slow loops-inside-loops method :(

The attachment didn't seem to make it onto the list, so here it is again,
inline:

#!/usr/bin/perl
use strict;
use warnings;

my @source = ( 1, 5, 7 );
my @sets = (
    [ 1, 3, 6, 7 ],
    [ 2, 4, 7, 9 ],
    [ 7, 3, 5, 2 ],
    [ 1, 5, 7, 9 ],
    [ 99, 99, 0, 0 ]
);

print "Looking for similarity with: " . join(',', @source) . "\n";
set_search_set(\@source);
foreach my $set (@sets) {
    my $result = compare_precached($set);
    print "Similarity: $result for " . join(',', @$set) . "\n";
}

# This is O(n^2)
sub compare_slow {
    my ($first, $second) = @_;
    my $count = 0;
    foreach my $i (@$first) {
        $count += grep { $_ eq $i } @$second;
    }
    return $count;
}

# This is O(2n) I think.
sub compare_hashed {
    my ($first, $second) = @_;
    my %search = map { $_ => 1 } @$first;
    return grep { exists $search{$_} } @$second;
}

# This would be marginally faster O(n)
{
    my %searchset;
    sub set_search_set {
        %searchset = map { $_ => 1 } @{$_[0]};
    }
    sub compare_precached {
        return grep { exists $searchset{$_} } @{$_[0]};
    }
}