#!/usr/bin/perl -w ####################### # findlinks4 # # If one really wanted to extract parts # of an html document, though, there's no # reason to re-invent the wheel. # # See, for example, HTML::LinkExtor, which is # part of the HTML::Parser package. # Documentation and examples may be found on the CPAN, # for example at # http://theoryx5.uwinnipeg.ca/CPAN/data/HTML-Parser/HTML/LinkExtor.html # ######################## use strict; # Complain to the user if not called properly. unless (@ARGV==1) { die " Wrong number of arguments. Usage: 'findfile file.html' \n"; } # Tell the nice user what we're up to. my $filename = $ARGV[0]; print "Analyzing '$filename' for links...\n"; # -- Now let the professionals do the work -- # Extract all this links. use HTML::LinkExtor; # Import a package. my $parser = HTML::LinkExtor->new; # Create an instance of an object. $parser->parse_file($filename); # Call one of that object's methods. my @results = $parser->links; # Call another method. # Print the data structure using the Dumper. use Data::Dumper; print Dumper(@results); # or # ## Print the data structure @results = ( $link1, $link2, $link3, ...) ## where $link = [ $tag, $attr1 => $uri1, $att2 => $uri2, ... ] ## #foreach my $link (@results) { # my ($tag, %stuff) = @$link; # print " tag: '$tag' "; # foreach my $key (keys %stuff) { # print ", $key => " . $stuff{$key} # } # print "\n"; #}