Contact ExtractorBack to Code Lab
This script is designed to extract the contact information for all the businesses listed on the www.collingwood.ca website. The address of the resources to be extracted is: http://www.collingwood.ca/business-detail.cfm?itemid=5168&table=business
The itemid will be the variable. Please note, for simplicity of this script we have removed most of the XHTML portion and kept the most important parts, please practice proper web standards.
<?php $address1 = "http://www.collingwood.ca/business-detail.cfm?itemid="; $address2 = "&table=business"; $counter = 0000; $maxCount = 8000; function extractData( $haystack, $openElement, $closeElement ) { // Find the opening element. $position1 = strpos( $haystack, $openElement ); if( $position1 == true ) { // Exclude the open element by shifting the position to the end of the open element. $position1 += strlen( $openElement ); // Find the closing element. $position2 = strpos( $haystack, $closeElement, $position1 ); // Extract the data return substr( $haystack, $position1, $position2 - $position1 ); } return null; } ?> <html> <body> <table border="1"> <tr><th>Business</th><th>Website</th><th>Email</th><th>Phone</th><th>Archive Address</th></tr> <?php for( ; $counter < $maxCount; $counter ++ ) { // Set the address with the proper itemid (counter). $address = $address1 . $counter . $address2; // Get the contents of the requested file. $haystack = file_get_contents( $address ); // NOTE: Each request returns script garbage and a basic html structure // even though in some cases an entry may not exist. // Find the business name within the h4 element. $name = extractData( $haystack, "<h4>", "</h4>" ); if( $name != null ) { echo " <tr>"; // Find the phone number. $phone = extractData( $haystack, "Phone:", "\n" ); // Strip out Res from some strings. if(( $temp = strpos( $phone, "Res" ))) $phone = substr( $phone, $temp + 3 ); // Strip any whitespace from the beginning of the string. $phone = ltrim( $phone ); // Strip any new line characters from the end. $phone = trim( $phone, "\n\r" ); // Find the email address. $email = extractData( $haystack, "mailto:", "\">" ); // Find the website address. $website = extractData( $haystack, "<a href=\"http://", "\"" ); echo "<th>$name</th>"; echo "<th><a href=\"http://$website\" target=\"_blank\">$website</a></th>"; echo "<th><a href=\"mailto:$email\">$email</a></th>"; echo "<th>$phone</th>"; echo "<th>$address</th>"; echo "</tr>\n"; } } ?> </table> </body> </html>