summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--boost.module43
1 files changed, 33 insertions, 10 deletions
diff --git a/boost.module b/boost.module
index e26aea3..86d6214 100644
--- a/boost.module
+++ b/boost.module
@@ -2087,6 +2087,7 @@ function boost_crawler_init() {
variable_set('boost_crawler_loaded_count' . BOOST_FILE_EXTENSION, 0);
variable_set('boost_crawler_loaded_count' . BOOST_XML_EXTENSION, 0);
variable_set('boost_crawler_loaded_count' . '.js', 0);
+ variable_set('boost_crawler_loaded_count_alias', 0);
variable_set('boost_crawler_threads_primed', FALSE);
variable_set('boost_crawler_stopped', FALSE);
boost_async_call_crawler($self);
@@ -2183,15 +2184,37 @@ function boost_crawler_count($push_setting, $extension) {
* Logic to get boost_crawler table ready.
*/
function boost_crawler_init_tables() {
- if (boost_crawler_add_to_table(BOOST_PUSH_HTML, BOOST_FILE_EXTENSION)) {
- if (boost_crawler_add_to_table(BOOST_PUSH_XML, BOOST_XML_EXTENSION)) {
- if (boost_crawler_add_to_table(BOOST_PUSH_JSON, '.js')) {
- // All URL's added to boost_cralwer table; start hitting URL's
- return TRUE;
- }
- else {return FALSE;}
- }
- else {return FALSE;}
+ if ( boost_crawler_add_to_table(BOOST_PUSH_HTML, BOOST_FILE_EXTENSION)
+ && boost_crawler_add_to_table(BOOST_PUSH_XML, BOOST_XML_EXTENSION)
+ && boost_crawler_add_to_table(BOOST_PUSH_JSON, '.js')
+ && boost_crawler_add_alias_to_table()
+ ) {
+ // All URL's added to boost_cralwer table; start hitting URL's
+ return TRUE;
+ }
+ else {
+ return FALSE;
}
- else {return FALSE;}
}
+
+/**
+ * Get URLs from url alias table
+ */
+function boost_crawler_add_alias_to_table() {
+ // Insert batch of html url's into boost_crawler table
+ global $base_url;
+ $count = 1000;
+ $total = db_query("SELECT COUNT(*) FROM {url_alias}");
+ $loaded = variable_get('boost_crawler_loaded_count_alias', 0);
+ if ($total > $loaded) {
+ $list = db_query_range("SELECT dst FROM {url_alias}", $loaded, $count);
+ while ($url = db_fetch_array($list)) {
+ @db_query("INSERT INTO {boost_crawler} (url) VALUES ('%s')", $base_url . '/' . $url['dst']);
+ }
+ variable_set('boost_crawler_loaded_count_alias', $loaded + $count);
+ return FALSE;
+ }
+ else {
+ return TRUE;
+ }
+} \ No newline at end of file