". t("Search guidelines") .""; $output .= "

". t("The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.") ."

"; $output .= "". t("Words excluded from the search") .""; $output .= "

". t("Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also being filtered.", array("%number" => variable_get("minimum_word_size", 2))) ."

"; return $output; } function search_system($field){ $system["description"] = t("Enables site wide keyword searching."); $system["admin_help"] = t("The search engine works by keeping an index of \"interesting\" words. To make sure we only get \"interesting\" words you need to set the following."); return $system[$field]; } /** * Return an array of valid search access permissions */ function search_perm() { return array("search content", "administer search"); } /** * Return an array of links to be displayed * * @param $type The type of page requesting the link * */ function search_link($type) { $links = array(); if ($type == "page" && user_access("search content")) { $links[] = l(t("search"), "search", array("title" => t("Search for older content."))); } return $links; } function search_settings() { $output = form_textfield(t("Minimum word length to index"), "minimum_word_size", variable_get("minimum_word_size", 2), 10, 10, t("The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.")); $output .= form_textfield(t("Minimum word length to search for"), "remove_short", variable_get("remove_short", 0), 10, 10, t("The number of characters a word has to be to be searched for.")); $output .= form_textarea(t("Noise words"), "noisewords", variable_get("noisewords", ""), 70, 10, t("These words will not be indexed, enter comma separated list, linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ...")); $output .= form_select(t("Help text position"), "help_pos", variable_get("help_pos", 1), array("1" => t("Above search output"), "2" => t("Below search output"), "3" => t("Link from above search output"), "4" => t("Link from below search output")), t("Where to show the help text for users on the search page.")); return $output; } /** * search engine administration actions * */ function search_admin() { $op = $_POST["op"]; // Only allow people with sufficient access. if (user_access("administer search")) { if ($op == "reindex") { search_invalidate(); print t("index invalidated") ."
\n"; search_cron(); print t("index recreated") ."

\n"; } } } /** * perform a regularly run action across all modules that have the * _update_index function in them. * */ function search_cron() { foreach (module_list() as $module) { $module_array = module_invoke($module, "update_index"); if ($module_array) { update_index($module_array); } $module_array = null; } return; } /** * Perform a search on a word(s) * * Search function called by each node that supports the indexed search * * @param $search_array an array as returned from _search * of type array("keys" => ..., * "type" => ..., "select" => ...) * see node_search in node.module for an * explanation of array items */ function do_search($search_array) { $keys = strtolower($search_array["keys"]); $type = $search_array["type"]; $select = $search_array["select"]; // Replace wildcards with mysql wildcards $keys = str_replace("*", "%", $keys); // Split the words entered into an array $words = explode(" ", $keys); foreach ($words as $word) { // If the word is too short, and we've got it set to skip them, loop if (strlen($word) < variable_get("remove_short", 0)) { continue; } // Put the next search word into the query and do the query $query = preg_replace("'\%'", $word, $select); $result = db_query($query); // If we got any results if (db_num_rows($result) != 0) { $found = 1; // Create an in memory array of the results, while ($row = db_fetch_array($result)) { $lno = $row["lno"]; $nid = $row["nid"]; $title = $row["title"]; $created = $row["created"]; $uid = $row["uid"]; $name = $row["name"]; $count = $row["count"]; // Build reduction variable $reduction[$lno][$word] = true; // If the just fetched row is not already in the table if ($results[$lno]["lno"] != $lno) { $results[$lno]["count"] = $count; $results[$lno]["lno"] = $lno; $results[$lno]["nid"] = $nid; $results[$lno]["title"] = $title; $results[$lno]["created"] = $created; $results[$lno]["uid"] = $uid; $results[$lno]["name"] = $name; } else { /* ** Different word, but existing "lno", increase the count of ** matches against this "lno" by the number of times this ** word appears in the text */ $results[$lno]["count"] = $results[$lno]["count"] + $count; } } } } if ($found) { foreach ($results as $lno => $values) { $pass = true; foreach ($words as $word) { if (!$reduction[$lno][$word]) { $pass = false; } } if ($pass) { $fullresults[$lno] = $values; } } $results = $fullresults; if (!is_array($results)) { $found = 0; } } if ($found) { // Black magic here to sort the results array_multisort($results, SORT_DESC); // OK, time to output the results. foreach ($results as $key => $value) { $lno = $value["lno"]; $nid = $value["nid"]; $title = $value["title"]; $created = $value["created"]; $uid = $value["uid"]; $name = $value["name"]; $count = $value["count"]; switch ($type) { case "node": $find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/node/edit/$lno") : url("node/view/$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words)); break; case "comment": $find[$i++] = array("count" => $count, "title" => $title, "link" => (strstr(request_uri(), "admin") ? url("admin/comment/edit/$lno") : url("node/view/$nid#$lno")), "user" => $name, "date" => $created, "keywords" => implode("|", $words)); break; } } } return $find; } /** * Update the search_index table * * @param $search_array an array as returned from _update_index * of type array("last_update" => ..., * "node_type" => ..., "select" => ...) * see node_update_index in node.module for an * explanation of array items */ function update_index($search_array) { $last_update = variable_get($search_array["last_update"], 1); $node_type = $search_array["node_type"]; $select = $search_array["select"]; $minimum_word_size = variable_get("minimum_word_size", 2); //watchdog("user", "$last_update
$node_type
$select"); $result = db_query($select); if (db_num_rows($result)) { // Wohoo, found some, look through the nodes we just selected while ($node = db_fetch_array ($result)) { /* ** Trash any existing entries in the search index for this node, ** in case its a modified node. */ db_query("DELETE from search_index where lno = '". $node["lno"] ."' and type = '". $node_type ."'"); /* ** Build the wordlist, teaser not included, as it then gives a ** false count of the number of hits, and doesn't show up ** when clicking on a node from the search interface anyway. */ $wordlist = $node["text1"] ." ". $node["text2"]; // Strip heaps of stuff out of it $wordlist = preg_replace("'<[\/\!]*?[^<>]*?>'si", "", $wordlist); // Remove punctuation and stuff $wordlist = preg_replace("'(\xBB|\xAB|!|\xA1|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", "", $wordlist); // Strip out (now mangled) http and tags. $wordlist = preg_replace("'http\w+'", "", $wordlist); $wordlist = preg_replace("'www\w+'", "", $wordlist); // Remove all newlines of any type $wordlist = preg_replace("'([\r\n]|[\r]|[\n])'", " ", $wordlist); // Lower case the whole thing. $wordlist = strtolower($wordlist); // Remove "noisewords" $noise = explode(",", variable_get("noisewords", "")); foreach ($noise as $word) { $wordlist = preg_replace("' $word '", " ", $wordlist); } // Remove whitespace $wordlist = preg_replace("'[\s]+'", " ", $wordlist); // Make it an array $eachword = explode(" ", $wordlist); /* ** walk through the array, giving a "weight" to each word, based on ** the number of times it appears in a page. */ foreach ($eachword as $word) { if (strlen($word) >= $minimum_word_size) { if ($newwords[$word]) { $newwords[$word]++; } else { $newwords[$word] = 1; } } } /* ** Walk through the weighted words array, inserting them into ** the search index */ if ($newwords) { foreach ($newwords as $key => $value) { db_query("INSERT INTO search_index VALUES('%s', %d, '%s', %d)", $key, $node["lno"], $node_type, $value); } } // Zap the weighted words array, so we don't add multiples. $newwords = array (); } } // update the last time this process was run. variable_set($search_array["last_update"], time()); return true; } function search_invalidate() { foreach (module_list() as $module) { $module_array = module_invoke($module, "update_index"); if ($module_array) { variable_set($module_array["last_update"], 1); } $module_array = null; } return; } /** * Save the values entered by the administrator for the search module * * @param $edit An array of fields as setup via calling form_textfield, * form_textarea etc */ function search_save($edit) { variable_set("minimum_word_size", $edit["minimum_word_size"]); $data = strtr($edit["noisewords"], "\n\r\t", " "); $data = str_replace(" ", "", $data); variable_set("noisewords", $data); variable_set("help_pos", $edit["help_pos"]); variable_set("remove_short", $edit["remove_short"]); } function search_view($keys = NULL) { global $type; $edit = $_POST["edit"]; if (user_access("search content")) { // Construct the search form: $form = search_form(NULL, NULL, TRUE); // Collect the search results: $output = search_data(); // Display form and search results: $help_link = l(t("search help"), "search/help"); switch (variable_get("help_pos", 1)) { case "1": $form = search_help(). $form; break; case "2": $form .= search_help(); break; case "3": $form = $help_link. $form; break; case "4": $form .= $help_link; } theme("header"); if ($form) { theme("box", t("Search"), $form); } if ($keys) { if ($output) { theme("box", t("Result"), $output); } else { theme("box", t("Result"), t("Your search yielded no results.")); } } theme("footer"); } else { theme("header"); theme("box", t("Access denied"), message_access()); theme("footer"); } } function search_page() { $keys = isset($_GET["keys"]) ? $_GET["keys"] : $_POST["keys"]; switch (arg(1)) { case "help": theme("header"); theme("box", t("Search Help"), search_help()); theme("footer"); break; default: search_view($keys); } } ?>