diff --git a/htroot/api/push_p.html b/htroot/api/push_p.html
new file mode 100644
index 000000000..febf7eca3
--- /dev/null
+++ b/htroot/api/push_p.html
@@ -0,0 +1,76 @@
+
+
+
+
+ File Upload
+ #(mode)#
+
+ This form can be used to upload a file and assign it to an url.
+ Example usage is the direct attachment of a content management system to YaCy to push newly changed files directly to the YaCy indexer.
+
+ ::
+
+ Result for the recently submitted file(s). You can also submit the same form using the servlet push_p.json to get push confirmations in json format.
+
+ - count
- #[count]#
+ - successall
- #(successall)#false::true#(/successall)#
+ - countsuccess
- #[countsuccess]#
+ - countfail
- #[countfail]#
+
+
+ Item | URL | Success | Message |
+ #{results}#
+
+ #[item]# |
+ #[url]# |
+ #(success)#fail::ok#(/success)# |
+ #(success)##[message]#::#[message]##(/success)# |
+
+ #{/results}#
+
+
+ If you want to push again files, use this form to pre-define a number of upload forms:
+
+
+ #(/mode)#
+
+
\ No newline at end of file
diff --git a/htroot/api/push_p.java b/htroot/api/push_p.java
new file mode 100644
index 000000000..6ad5a3867
--- /dev/null
+++ b/htroot/api/push_p.java
@@ -0,0 +1,134 @@
+/**
+ * push_p
+ * Copyright 2014 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
+ * First released 12.06.2014 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt
+ * If not, see .
+ */
+
+import java.net.MalformedURLException;
+import java.util.Date;
+
+import net.yacy.cora.document.encoding.ASCII;
+import net.yacy.cora.document.encoding.UTF8;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.order.Base64Order;
+import net.yacy.cora.protocol.Domains;
+import net.yacy.cora.protocol.HeaderFramework;
+import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.protocol.ResponseHeader;
+import net.yacy.crawler.data.CrawlProfile;
+import net.yacy.crawler.retrieval.Request;
+import net.yacy.crawler.retrieval.Response;
+import net.yacy.search.IndexingQueueEntry;
+import net.yacy.search.Switchboard;
+import net.yacy.server.serverObjects;
+import net.yacy.server.serverSwitch;
+
+public class push_p {
+
+ // test: http://localhost:8090/api/push_p.json?count=1&synchronous=false&commit=false&url-0=http://nowhere.cc/example.txt&data-0=%22hello%20world%22&lastModified-0=Tue,%2015%20Nov%201994%2012:45:26%20GMT&contentType-0=text/plain&collection-0=testpush
+
+ public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env) {
+ final Switchboard sb = (Switchboard) env;
+ final serverObjects prop = new serverObjects();
+
+ // display mode: this only helps to display a nice input form for test cases
+ int c = post == null ? 1 : post.getInt("c", 0);
+ if (c > 0) {
+ prop.put("mode", 0);
+ for (int i = 0; i < c; i++) prop.put("mode_input_" + i + "_count", i);
+ prop.put("mode_input", c);
+ prop.put("mode_count", c);
+ return prop;
+ }
+
+ // push mode: this does a document upload
+ prop.put("mode", 1);
+ if (post == null) return prop;
+ boolean synchronous = post.getBoolean("synchronous");
+ boolean commit = post.getBoolean("commit");
+ int count = post.getInt("count", 0);
+ boolean successall = true;
+ int countsuccess = 0;
+ int countfail = 0;
+ for (int i = 0; i < count; i++) {
+ try {
+ prop.put("mode_results_" + i + "_item", i);
+ String u = post.get("url-" + i, "");
+ prop.put("mode_results_" + i + "_url", u);
+ DigestURL url = new DigestURL(u);
+ String collection = post.get("collection-" + i, "");
+ String lastModified = post.get("lastModified-" + i, ""); // must be in RFC1123 format
+ String contentType = post.get("contentType-" + i, "");
+ String data64 = post.get("data-" + i, ""); // file uploads are base64encoded in YaCyDefaultServlet.parseMultipart
+ byte[] data = Base64Order.standardCoder.decode(data64);
+ if ((data == null || data.length == 0) && data64.length() > 0) data = UTF8.getBytes(data64); // for test cases
+
+ // create response header
+ final RequestHeader requestHeader = new RequestHeader();
+ final ResponseHeader responseHeader = new ResponseHeader(200);
+ responseHeader.put(HeaderFramework.LAST_MODIFIED, lastModified);
+ responseHeader.put(HeaderFramework.CONTENT_TYPE, contentType);
+ responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(data.length));
+ CrawlProfile profile = sb.crawler.getPushCrawlProfile(collection);
+
+ // create requests and artificial response
+ final Request request = new Request(
+ ASCII.getBytes(sb.peers.mySeed().hash),
+ url,
+ null, // referrer hash
+ "", // the name of the document to crawl
+ new Date(), // current date
+ profile.handle(), // the name of the prefetch profile. This must not be null!
+ 0, // depth the crawling depth of the entry
+ 0, // anchors number of anchors of the parent
+ 0); // forkfactor sum of anchors of all ancestors
+ Response response = new Response(
+ request,
+ requestHeader,
+ responseHeader,
+ profile,
+ false, // from cache?
+ data); // content
+
+ // asynchronously push the content to the indexing queue
+ sb.indexingDocumentProcessor.enQueue(new IndexingQueueEntry(
+ response,
+ null,
+ null));
+ prop.put("mode_results_" + i + "_success", "1");
+ prop.put("mode_results_" + i + "_success_message", "http://" + Domains.myPublicLocalIP().getHostAddress() + ":" + sb.getConfigInt("port", 8090) + "/solr/select?q=sku:%22" + u + "%22");
+ countsuccess++;
+ } catch (MalformedURLException e) {
+ e.printStackTrace();
+ prop.put("mode_results_" + i + "_success", "0");
+ prop.put("mode_results_" + i + "_success_message", e.getMessage());
+ successall = false;
+ countfail++;
+ }
+ }
+ prop.put("mode_results", count);
+ prop.put("mode_successall", successall ? "1" : "0");
+ prop.put("mode_count", count);
+ prop.put("mode_countsuccess", countsuccess);
+ prop.put("mode_countfail", countfail);
+
+ if (synchronous && commit) sb.index.fulltext().commit(true);
+
+ return prop;
+ }
+
+}
diff --git a/htroot/api/push_p.json b/htroot/api/push_p.json
new file mode 100644
index 000000000..7b69b22e6
--- /dev/null
+++ b/htroot/api/push_p.json
@@ -0,0 +1,14 @@
+{#(mode)#::
+ "count":"#[count]#",
+ "successall": #(successall)#"false"::"true"#(/successall)#,
+#{results}#
+ "item-#[item]#":{
+ "item":"#[item]#",
+ "url":"#[url]#",
+ "success": #(success)#"false"::"true"#(/success)#,
+ "message": #(success)#"#[message]#"::"#[message]#"#(/success)#
+ },
+#{/results}#
+ "countsuccess":#[countsuccess]#,
+ "countfail":#[countfail]#
+#(/mode)#}
diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java
index 85411379d..bfcb399e1 100644
--- a/source/net/yacy/crawler/CrawlSwitchboard.java
+++ b/source/net/yacy/crawler/CrawlSwitchboard.java
@@ -67,6 +67,7 @@ public final class CrawlSwitchboard {
public static final String CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA = "snippetLocalMedia";
public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA = "snippetGlobalMedia";
public static final String CRAWL_PROFILE_SURROGATE = "surrogates";
+ public static final String CRAWL_PROFILE_PUSH_STUB = "push_";
public static Set DEFAULT_PROFILES = new HashSet();
static {
@@ -96,12 +97,9 @@ public final class CrawlSwitchboard {
private final MapHeap profilesPassiveCrawls;
private final Map profilesActiveCrawlsCache; //TreeMap(Base64Order.enhancedCoder);
private final Map profilesActiveCrawlsCounter;
- public CrawlProfile defaultProxyProfile;
- public CrawlProfile defaultRemoteProfile;
- public CrawlProfile defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
- public CrawlProfile defaultTextGreedyLearningProfile;
- public CrawlProfile defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile;
- public CrawlProfile defaultSurrogateProfile;
+ public CrawlProfile defaultProxyProfile, defaultRemoteProfile, defaultTextSnippetLocalProfile, defaultTextSnippetGlobalProfile;
+ public CrawlProfile defaultTextGreedyLearningProfile, defaultMediaSnippetLocalProfile, defaultMediaSnippetGlobalProfile, defaultSurrogateProfile;
+ private Map defaultPushProfiles; // for each collection one profile
private final File queuesRoot;
private Switchboard switchboard;
@@ -110,6 +108,7 @@ public final class CrawlSwitchboard {
this.switchboard = switchboard;
this.log = this.switchboard.log;
this.queuesRoot = this.switchboard.queuesRoot;
+ this.defaultPushProfiles = new ConcurrentHashMap<>();
this.log.info("Initializing Word Index for the network '" + networkName + "'.");
if ( networkName == null || networkName.isEmpty() ) {
@@ -493,7 +492,7 @@ public final class CrawlSwitchboard {
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE),
-1,
- true, true, true,
+ true, true, false,
true,
false,
false,
@@ -505,6 +504,38 @@ public final class CrawlSwitchboard {
UTF8.getBytes(this.defaultSurrogateProfile.handle()),
this.defaultSurrogateProfile);
}
+
+ public CrawlProfile getPushCrawlProfile(String collection) {
+ CrawlProfile genericPushProfile = this.defaultPushProfiles.get(collection);
+ if (genericPushProfile != null) return genericPushProfile;
+ genericPushProfile = new CrawlProfile(
+ CRAWL_PROFILE_PUSH_STUB + collection,
+ CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
+ CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
+ CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
+ CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
+ CrawlProfile.MATCH_NEVER_STRING, //crawlerCountryMustMatch
+ CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
+ CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
+ CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
+ CrawlProfile.MATCH_ALL_STRING, //indexContentMustMatch
+ CrawlProfile.MATCH_NEVER_STRING, //indexContentMustNotMatch
+ 0,
+ false,
+ System.currentTimeMillis(),
+ -1,
+ true, true, false,
+ true,
+ true,
+ false,
+ false,
+ CacheStrategy.NOCACHE,
+ collection,
+ ClientIdentification.yacyIntranetCrawlerAgentName);
+ this.profilesActiveCrawls.put(UTF8.getBytes(genericPushProfile.handle()), genericPushProfile);
+ this.defaultPushProfiles.put(collection, genericPushProfile);
+ return genericPushProfile;
+ }
private void resetProfiles() {
this.profilesActiveCrawlsCache.clear();
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 28c567155..5677b6cf1 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2069,7 +2069,6 @@ public final class Switchboard extends serverSwitch {
CrawlProfile selentry;
for ( final byte[] handle : this.crawler.getActive() ) {
selentry = this.crawler.getActive(handle);
- assert selentry.handle() != null : "profile.name = " + selentry.collectionName();
if ( selentry.handle() == null ) {
this.crawler.removeActive(handle);
continue;