Creating a Search Application - (Page 18 of 29 ) Now, we will glue all the bits together for you so that you may see the class file in its entirety. At the beginning of the class file, we have used to require statement to include the database class from the earlier section. <?php require('dbclass.php');
class Harvest_Keywords {
var $_db; var $_urlarray; var $_stopwords = array ('and', 'but', 'are', 'the'); var $_allowwords = array ('c++', 'ado', 'vb');
function Harvest_Keywords($urls) { $this->_db = new DB_Class('test', 'username', 'password'); $this->_urlarray = trim ($urls); $this->_urlarray = explode ("\n", $this->_urlarray); }
function _prune (&$item, $key, $array) { $item = strtolower ($item); if (((preg_match ("/[^a-z0-9'\?!-]/", $item)) || (strlen ($item) < 3) || (in_array($item, $this->_stopwords))) && (!in_array($item, $this->_allowwords))) {
unset($array[$key]); } else { $item = addslashes(preg_replace("/[^a-z0-9'-]/i", '', $item)); } }
function _checkURL($url) { return preg_match ("/http:\/\/(.*)\.(.*)/i", $url); }
function _getData($url) { $filehandle = @fopen($url, 'r'); if(!$filehandle) { echo "Could not open URL ($url).<br />\n"; $return = FALSE; } else { $data = fread($filehandle, 25000); fclose($filehandle); $data = strip_tags ($data); $data = str_replace('&nbsp;', ' ', $data); $return = $data; } return $return; }
function _harvest($url) { if(!$this->_checkURL($url)) { echo "URL is not valid ($url).<br />\n"; } elseif ($data = $this->_getData($url)) { $words = preg_split ("/[\s,.]+/", $data); array_walk ($words, array($this, '_prune'), &$words); sort ($words); $url_id = $this->_db->getone("SELECT id FROM urls " . "WHERE url='$url'"); if($url_id) { $this->_db->query("DELETE FROM keywords " . "WHERE url_id=$url_id"); } else { $this->_db->query("INSERT INTO urls SET url='$url'"); $url_id = mysql_insert_id(); } $values = "($url_id, '$words[0]')"; $numwords = count ($words); for ($i = 1; $i < $numwords; $i++) { $values .= ", ($url_id, '$words[$i]')"; } $this->_db->query("INSERT INTO keywords VALUES $values"); } }
function process() { foreach($this->_urlarray as $url) { $this->_harvest($url); } } } ?> |
Next: >>
More Database Articles Articles More By Matt Wade |