Sérmac
|
UPDATE Επειδή άλλαξαν λίγο το fsserron.gr, η έκδοση 0.1 είχε σταματήσει να λειτουργεί.
Έκδοση 0.2
check.php
<?php /* FSSerron Parser @version: 0.2 @filename: check.php @created on: March 26, 2011 @modified on: May 30, 2011 @author: pgian @contact: pgian (at) sermac (dot) gr @copyright: 2011, All rights reserved @license: GNU General Public License (GPL) - FSSerron Parser is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - FSSerron Parser is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details: http://www.gnu.org/copyleft/gpl.html */ // Include function files for this application require_once('./functions.php'); ?> <!DOCTYPE html> <html> <head> <title>FSSerron Parser</title> <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> </head> <body> <?php // Call fsserron and get data $html = curlCall('http://www.fsserron.gr/'); // Parse data into array $array_parsed = array(); $i = 0; $dom = new DOMDocument(); @$dom->loadHTML($html); $table_nodes = $dom->getElementsByTagName('table'); foreach($table_nodes as $table_node) { if($table_node->getAttribute('class') == 'efimereuon') { $rows = $table_node->childNodes; foreach($rows as $row) { if($row->nodeName == 'tr') { $cols = $row->childNodes; foreach($cols as $col) { if($col->nodeName == 'td' && $col->getAttribute('class') == 'current') { $data = $col->childNodes; foreach($data as $key => $d) { $href = null; $name = null; $hours = null; $address = null; $map = null; $value = trim($d->nodeValue); if(!empty($value)) { if($d->nodeName == 'a') { // Parse pharmacy name and url $href = str_replace('&', '&', 'http://www.fsserron.gr'.$d->getAttribute('href')); $name = trim($d->nodeValue); // Parse on-duty hours if($data->item($key+3)->nodeName == '#text') $hours = str_replace('&', '&', trim($data->item($key+3)->nodeValue)); if(!empty($href)) { $html_pharmacy = curlCall($href); $dom_pharmacy = new DOMDocument(); @$dom_pharmacy->loadHTML($html_pharmacy); // Parse address $td_nodes = $dom_pharmacy->getElementsByTagName('td'); $td_counter = 0; foreach($td_nodes as $key=> $td_node) { if($td_node->getAttribute('valign') == 'top' && $td_node->getAttribute('width') == null && $td_node->getAttribute('rowspan') == null) { if ($td_counter < 2) $address .= trim($td_node->nodeValue); if ($td_counter < 1) $address .= ', '; $td_counter++; } } // Parse map image $map = null; $img_nodes = $dom_pharmacy->getElementsByTagName('img'); foreach($img_nodes as $img_node) { if($img_node->getAttribute('align') == 'middle') $map = str_replace('&', '&', 'http://www.fsserron.gr'.$img_node->getAttribute('src')); } } $array_parsed[$i] = array('name' => $name, 'url' => $href, 'hours' => $hours, 'address' => $address, 'map' => $map); $i++; } } } } } } } } } // Check array //print_r($array_parsed); // Genarate html writeDataDOM('./pharmacies.html', $array_parsed); ?> </body> </html>
functions.php
<?php
function curlCall($url, $cookie=NULL, $cert=NULL) { // Connects and communicates with servers $curl = curl_init($url); curl_setopt($curl, CURLOPT_USERAGENT, 'Googlebot/2.1 (http://www.googlebot.com/bot.html)'); if ($cert != NULL) { //curl_setopt($curl, CURLOPT_USERPWD, $username.':'.$password); curl_setopt($curl, CURLOPT_SSLVERSION,3); curl_setopt($curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); //curl_setopt($curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2); curl_setopt($curl, CURLOPT_CAINFO, $cert); } curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); if ($cookie != NULL) { curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie); } curl_setopt($curl, CURLOPT_AUTOREFERER, true); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_TIMEOUT, 10); $html = curl_exec($curl); if ($html === false) { echo 'Curl error: ' . curl_error($curl) . '<br />'; } curl_close($curl); return $html; }
function writeData($filename, $data) { // Writes parsed data into an html file $file = fopen($filename, 'w+'); $html = '<div id="pharmacies">'; foreach($data as $d) { $html .= '<div class="pharmacy">'; $html .= '<div class="pharmacy-name"><a href="'.$d['url'].'" target="_blank">'.$d['name'].'</a></div>'; $html .= '<div class="pharmacy-address"><a href="'.$d['map'].'" target="_blank">'.$d['address'].'</a></div>'; $html .= '<div class="pharmacy-hours">'.$d['hours'].'</div>'; $html .= '</div>'; } $html .= '</div>'; fwrite($file, $html); fclose($file); }
function writeDataDOM($filename, $data) { // Writes parsed data into an html file $doc = new DOMDocument('1.0'); $doc->formatOutput = true;
$div_pharmacies = $doc->createElement('div'); $div_pharmacies->setAttribute('id', 'pharmacies'); $doc->appendChild($div_pharmacies); foreach($data as $d) { $div_pharmacy = $doc->createElement('div'); $div_pharmacy->setAttribute('class', 'pharmacy'); $div_pharmacies->appendChild($div_pharmacy); $div_name = $doc->createElement('div'); $div_name->setAttribute('class', 'pharmacy-name'); $div_pharmacy->appendChild($div_name); $div_name_a = $doc->createElement('a'); $div_name_a->setAttribute('href', $d['url']); $div_name_a->setAttribute('target', '_blank'); $div_name_a->appendChild($doc->createTextNode($d['name'])); $div_name->appendChild($div_name_a); $div_address = $doc->createElement('div'); $div_address->setAttribute('class', 'pharmacy-address'); $div_pharmacy->appendChild($div_address); $div_address_a = $doc->createElement('a'); $div_address_a->setAttribute('href', $d['map']); $div_address_a->setAttribute('target', '_blank'); $div_address_a->appendChild($doc->createTextNode($d['address'])); $div_address->appendChild($div_address_a); $div_hours = $doc->createElement('div'); $div_hours->setAttribute('class', 'pharmacy-hours'); $div_hours->appendChild($doc->createTextNode($d['hours'])); $div_pharmacy->appendChild($div_hours); }
// Create XML file $doc->saveHTMLFile($filename); }
?>
|