#(showload)#Available after successful loading of rss feed in simulation mode::
- not yet implemented THIS INTERFACE IS A STUB - DEVELOPMENT IS ONGOING
+
Indexing
+
#(showload)#Available after successful loading of rss feed in preview::
+
+
+
+
once
+
load this feed once now
+
scheduled
+
repeat the feed loading every
+
+ automatically.
+
+
#(/showload)#
@@ -49,7 +70,7 @@
#(showitems)#::
#(/showitems)#
diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java
index 1f02d8f30..57b7d2950 100644
--- a/htroot/Load_RSS_p.java
+++ b/htroot/Load_RSS_p.java
@@ -21,23 +21,33 @@
import java.io.IOException;
import java.net.MalformedURLException;
import java.text.DateFormat;
+import java.util.Date;
+import java.util.Map;
import net.yacy.cora.document.Hit;
import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.storage.ARC;
+import net.yacy.cora.storage.ComparableARC;
+import net.yacy.document.Parser.Failure;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
+import net.yacy.kelondro.order.Base64Order;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.Response;
+import de.anomic.data.WorkTables;
+import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class Load_RSS_p {
+ private static final ARC indexTriggered = new ComparableARC(1000, Base64Order.enhancedCoder);
+
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
@@ -51,11 +61,17 @@ public class Load_RSS_p {
prop.put("url", post.get("url", ""));
+ int repeat_time = Integer.parseInt(post.get("repeat_time", "-1"));
+ final String repeat_unit = post.get("repeat_unit", "seldays"); // selminutes, selhours, seldays
+ if (!post.get("repeat", "off").equals("on") && repeat_time > 0) repeat_time = -1;
+
+ boolean record_api = false;
+
DigestURI url = null;
try {
url = post.containsKey("url") ? new DigestURI(post.get("url", ""), null) : null;
} catch (MalformedURLException e) {
- Log.logException(e);
+ Log.logWarning("Load_RSS_p", "url not well-formed: '" + post.get("url", "") + "'");
}
// if we have an url then try to load the rss
@@ -69,6 +85,54 @@ public class Load_RSS_p {
Log.logException(e);
}
+ // index all selected items: description only
+ if (rss != null && post.containsKey("indexSelectedItemContent")) {
+ RSSFeed feed = rss.getFeed();
+ loop: for (Map.Entry entry: post.entrySet()) {
+ if (entry.getValue().startsWith("mark_")) try {
+ RSSMessage message = feed.getMessage(entry.getValue().substring(5));
+ DigestURI messageurl = new DigestURI(message.getLink());
+ if (indexTriggered.containsKey(messageurl.hash())) continue loop;
+ if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
+ sb.addToIndex(messageurl, null, null);
+ indexTriggered.put(messageurl.hash(), new Date());
+ } catch (IOException e) {
+ Log.logException(e);
+ } catch (Failure e) {
+ Log.logException(e);
+ }
+ }
+ }
+ if (rss != null && post.containsKey("indexAllItemContent")) {
+ record_api = true;
+ RSSFeed feed = rss.getFeed();
+ loop: for (RSSMessage message: feed) {
+ try {
+ DigestURI messageurl = new DigestURI(message.getLink());
+ if (indexTriggered.containsKey(messageurl.hash()) && post.containsKey("indexSelectedItemContent")) continue loop;
+ if (sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null) continue loop;
+ sb.addToIndex(messageurl, null, null);
+ indexTriggered.put(messageurl.hash(), new Date());
+ } catch (IOException e) {
+ Log.logException(e);
+ } catch (Failure e) {
+ Log.logException(e);
+ }
+ }
+ }
+
+ if (record_api) {
+ // record API action
+ if (repeat_time > 0) {
+ // store as scheduled api call
+ sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false), repeat_time, repeat_unit.substring(3));
+ } else {
+ // store just a protocol
+ sb.tables.recordAPICall(post, "Load_RSS_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "import feed " + url.toNormalform(true, false));
+ }
+ }
+
+ // show items from rss
if (rss != null) {
prop.put("showitems", 1);
RSSFeed feed = rss.getFeed();
@@ -76,27 +140,30 @@ public class Load_RSS_p {
prop.putHTML("showitems_title", channel.getTitle());
String author = channel.getAuthor();
if (author == null || author.length() == 0) author = channel.getCopyright();
+ Date pubDate = channel.getPubDate();
prop.putHTML("showitems_author", author == null ? "" : author);
prop.putHTML("showitems_description", channel.getDescription());
prop.putHTML("showitems_language", channel.getLanguage());
- prop.putHTML("showitems_date", DateFormat.getDateTimeInstance().format(channel.getPubDate()));
+ prop.putHTML("showitems_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
prop.putHTML("showitems_ttl", channel.getTTL());
prop.putHTML("showitems_docs", channel.getDocs());
int i = 0;
for (final Hit item: feed) {
try {
- url = new DigestURI(item.getLink(), null);
+ DigestURI messageurl = new DigestURI(item.getLink(), null);
author = item.getAuthor();
if (author == null) author = item.getCopyright();
+ pubDate = item.getPubDate();
prop.put("showitems_item_" + i + "_count", i);
- prop.putHTML("showitems_item_" + i + "_hash", new String(url.hash()));
+ prop.put("showitems_item_" + i + "_state", sb.urlExists(Segments.Process.LOCALCRAWLING, messageurl.hash()) != null ? 2 : indexTriggered.containsKey(messageurl.hash()) ? 1 : 0);
+ prop.putHTML("showitems_item_" + i + "_guid", item.getGuid());
prop.putHTML("showitems_item_" + i + "_author", author == null ? "" : author);
prop.putHTML("showitems_item_" + i + "_title", item.getTitle());
- prop.putHTML("showitems_item_" + i + "_link", url.toNormalform(false, false));
+ prop.putHTML("showitems_item_" + i + "_link", messageurl.toNormalform(false, false));
prop.putHTML("showitems_item_" + i + "_description", item.getDescription());
prop.putHTML("showitems_item_" + i + "_language", item.getLanguage());
- prop.putHTML("showitems_item_" + i + "_date", DateFormat.getDateTimeInstance().format(item.getPubDate()));
+ prop.putHTML("showitems_item_" + i + "_date", (pubDate == null) ? "" : DateFormat.getDateTimeInstance().format(pubDate));
i++;
} catch (MalformedURLException e) {
Log.logException(e);
@@ -105,7 +172,11 @@ public class Load_RSS_p {
}
prop.put("showitems_item", i);
prop.put("showitems_num", i);
- if (i > 0) prop.put("showload", 1);
+ prop.putHTML("showitems_rss", url.toNormalform(true, false));
+ if (i > 0) {
+ prop.put("showload", 1);
+ prop.put("showload_rss", url.toNormalform(true, false));
+ }
}
return prop;
diff --git a/htroot/Tables_p.html b/htroot/Tables_p.html
index f8794c16f..a2ae0c488 100644
--- a/htroot/Tables_p.html
+++ b/htroot/Tables_p.html
@@ -28,7 +28,7 @@
#%env/templates/header.template%#
#%env/templates/submenuConfig.template%#
-
+ #(showselection)#::