#!/usr/bin/perl -w #################### # findlinks # Homework exercise to find and print links in an html file. # Usage: ./findlinks file.html # # Here I have chosen to define a "link" as any string # of url-ish characters (as defined below) # that ends in .html or .htm , regardless of whether # it's in an tag or not. # # Jim Mahoney, Oct 2001 ###################### use strict; local $/ = undef; # Undefining the input record seperator means we can now my $html = <>; # read in the entire file all at once. # The following regular expression (stuff inside the m{} ) matches # \b word boundry # [\w./:] any single word character, period, slash, or colon # + lots of them in a row (at least 1) # \. a period # htm the letters "htm" # l? optionally, the letter l # The ig at the end means "ignore case" and "global match". # In scalar context, "global match" means that each time # we look, we pick up after the last successful match. # The special variable $& is set to the string that is matched. while ( $html =~ m{\b[\w./:]+\.html?}ig ) { print $&, "\n"; }