#!/usr/bin/perl -w ####################### # findlinksfromurl # # As long as we're using CPAN modules, we # can go fetch the web page, too. # # In addition to HTML::LinkExtor (see findlinks4), # this example shows off the ever popular LWP::Simple. # # See also # libwww-perl (http://theoryx5.uwinnipeg.ca/CPAN/data/libwww-perl/) # LWP::Simple # (http://bob/~msie/2002/ipl/perl/docs/various_modules/LWP-Simple.html) # LWP::UserAgent # URI.pm , and HTML::Parser. # # Remember that to install any of these, the procedure is # shell> su # shell# perl -MCPAN -e shell # cpan> install LWP::Simple # .... # cpan> exit # Or manually fetch the .tar.gz file unpack it, and follow the instructions, # typically "perl Makefile.PL; make; make install ". # # To see the documentation, either head to the CPAN online or # # # read the documentation # shell> perldoc LWP::Simple # # # create an html documentation file # shell> perldoc -u LWP::Simple | pod2html > LWP-Simple.html # ######################## use strict; unless (@ARGV==1) { die " Wrong number of arguments. Usage: 'findlinksfromurl url' \n"; } my $url = $ARGV[0]; print "Fetching the web page from '$url' ... \n"; use LWP::Simple; my $html = get($url); # What could be simpler than that? unless ($html) { die " Couldn't fetch '$url' with LWP::Simple. \n"; } print "Extracting links ... \n"; # Extract all this links. use HTML::LinkExtor; # Import a package. my $parser = HTML::LinkExtor->new; # Create an instance of an object. $parser->parse($html); # Call one of that object's methods. $parser->eof; my @results = $parser->links; # Call another method. ## Print the data structure using the Dumper. # use Data::Dumper; # print Dumper(@results); # Print the data structure @results = ( $link1, $link2, $link3, ...) # where $link = [ $tag, $attr1 => $uri1, $att2 => $uri2, ... ] foreach my $link (@results) { my ($tag, %stuff) = @$link; print " tag: '$tag' "; foreach my $key (keys %stuff) { print ", $key => " . $stuff{$key} } print "\n"; }