#! /usr/local/bin/perl ##### # # Example web robot to make an automated query on google.com # Before using this see: http://www.google.com/terms_of_service.html # # See http://www.google.com/apis/ for an authorized way to automate # queries on google. The Net::Google or WWW::Search::Google modules # available from CPAN provide an interface. # # Other options include: WWW::Search::Scraper::Google, also on CPAN. # ##### # # Required modules use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use HTML::Parse; # # Constants $searchterm = "porsche+911"; $root = "http://www.google.com/search?q=$searchterm"; # # Choose what kind of browser we look like to the web site $browserid = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)'; #$browserid = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)'; #$browserid = 'Mozilla/4.0 (compatible; MSIE 5.0; Win32)'; #$browserid = 'Mozilla/4.78 [en] (Windows NT 5.0; U)'; #$browserid = 'Mozilla/4.5 [en] (X11; U; SunOS 5.6 sun4u)'; # Create a new UserAgent object $ua = new LWP::UserAgent; # Set the product token to fake a regular browser # If you don't do this LWP identifies itself and google blocks the request $ua->agent($browserid); # Create a request object $request = new HTTP::Request('GET', $root); # Get the page $response = $ua->request($request); # Parse the HTML in the returned page $parsed = HTML::Parse::parse_html($response->content); # Loop though links in the parsed HTML for (@{ $parsed->extract_links() }) { $link = $_->[0]; # Skip links that point back to google, # print the rest if ($link =~ /(?=^(?:(?!google\.com|search\?q=cache).)*$)^http/) { print "$link \n"; } } exit 0;