############################################### # Some routines to analyze proper names. # Essentially just a wrapper to a few CPAN modules. # # Usage: # use ExamineNames; # my $e = new ExamineNames; # print $e->getGender("Jim"); # prints "male" # print $e->expandNickname("Jim"); # prints "James" # print $e->sameNames("Jim Mahoney", "JAMES H MAHONEY"); # prints 82 # ################################################ package ExamineNames; our $DEBUG = 0; # ------------------------------ sub new { my ($class) = @_; return bless {}, $class; } # ------------------------------ # Input: nickname (string) # Output: 'male', 'female', or 'unknown' (string) sub getGender { my ($self, $name) = @_; use Text::GenderFromName; my $gender = gender($name); return "male" if $gender eq "m"; return "female" if $gender eq "f"; return "unkown"; } # ------------------------------ # Input: nickname (string) # Output: first guess at corresponding first name (string) sub expandNickname { my ($self, $name) = @_; use Lingua::EN::Nickname; my @fullnames = nickroot($name); if ($DEBUG){ print " in expandNickname. name='$name'; fullnames='", join(",",@fullnames), "'\n"; } return shift @fullnames; # return first possible fullname. } # ------------------------------ # Input: name1, name2 (strings) # both first and last names in name1 # Output: 0-100 (integer) # =0 => names do not match # # >0 => degree of confidence of match sub sameNames { my ($self, $name1, $name2) = @_; use Lingua::EN::MatchNames; my ($first1, $last1) = $name1 =~ m{^\s*(.*) ([\w\-\']+)\s*$}g; my ($first2, $last2) = $name2 =~ m{^\s*(.*) ([\w\-\']+)\s*$}g; my $result = name_eq( $first1, $last1, $first2, $last2 ); if ($DEBUG){ print " in sameNames: name1='$name1', name2='$name2' \n"; print " in sameNames: first1='$first1', last1='$last1' \n"; print " in sameNames: first2='$first2', last2='$last2' \n"; print " in sameNames: result = '$result' \n"; } return 0 unless $result; return 0+$result; # The 0+ here ensures that value is a number. } # ------------------------------ # Perl modules should evaluate as true. 1;