From 97f6089a41a4ed40aef84f692690e30f50585f5d Mon Sep 17 00:00:00 2001
From: Michael Peter Christen <mc@yacy.net>
Date: Mon, 1 Dec 2014 15:03:09 +0100
Subject: [PATCH] YaCy can now create web page snapshots as pdf documents which
 can later be transcoded into jpg for image previews. To create such pdfs you
 must do:

Add wkhtmltopdf and imagemagick to your OS, which you can do:
On a Mac download wkhtmltox-0.12.1_osx-cocoa-x86-64.pkg from
http://wkhtmltopdf.org/downloads.html and downloadh
ttp://cactuslab.com/imagemagick/assets/ImageMagick-6.8.9-9.pkg.zip
In Debian do "apt-get install wkhtmltopdf imagemagick"

Then check in /Settings_p.html?page=ProxyAccess: "Transparent Proxy" and
"Always Fresh" - this is used by wkhtmltopdf to fetch web pages using
the YaCy proxy. Using "Always Fresh" it is possible to get all pages
from the proxy cache.

Finally, you will see a new option when starting an expert web crawl.
You can set a maximum depth for crawling which should cause a pdf
generation. The resulting pdfs are then available in
DATA/HTCACHE/SNAPSHOTS/<host>.<port>/<depth>/<shard>/<urlhash>.<date>.pdf
---
 htroot/CrawlStartExpert.html                  | 22 +++++++++++++
 htroot/CrawlStartExpert.java                  | 12 ++++++-
 htroot/Crawler_p.java                         |  8 ++++-
 htroot/QuickCrawlLink_p.java                  |  2 +-
 source/net/yacy/cora/util/Html2Image.java     | 10 ++----
 source/net/yacy/crawler/CrawlSwitchboard.java | 18 +++++------
 .../net/yacy/crawler/data/CrawlProfile.java   | 19 +++++++++---
 source/net/yacy/crawler/data/Snapshots.java   | 31 ++++++++++++-------
 .../yacy/crawler/retrieval/HTTPLoader.java    |  9 ++++--
 .../net/yacy/data/ymark/YMarkCrawlStart.java  |  2 +-
 source/net/yacy/http/ProxyCacheHandler.java   |  2 +-
 source/net/yacy/search/Switchboard.java       |  4 +--
 12 files changed, 97 insertions(+), 42 deletions(-)
diff --git a/htroot/CrawlStartExpert.html b/htroot/CrawlStartExpert.html
index 6ae89a7a1..c05569c73 100644
--- a/htroot/CrawlStartExpert.html
+++ b/htroot/CrawlStartExpert.html
@@ -460,6 +460,28 @@
 	      </dl>
         </fieldset>
 	    #(/agentSelect)#
+        #(snapshotSelect)#<input type="hidden" name="snapshotsMaxDepth" id="snapshotsMaxDepth" value="-1" />::
+        <fieldset>
+          <legend>Snapshot Creation</legend>
+          <dl>
+          <dt><label for="snapshot">Max Depth for Snapshots</label></dt>
+          <dd>
+            <span class="info" style="float:right"><img src="env/grafics/i16.gif" width="16" height="16" alt="info"/><span style="right:0px;">
+            Snapshots are pictures of web pages that can be created during crawling time. These pictures will be stored as pdf at first into subdirectories
+            of HTCACHE/SNAPSHOTS/ and are computed to jpg from the pdfs later. Snapshot generation can be controlled using a depth parameter; that
+            means a snapshot is only be generated if the crawl depth of a document is smaller or equal to the given number here. If the number is set to -1,
+            no snapshots are generated.
+            </span></span>
+			<input type="text" name="snapshotsMaxDepth" id="snapshotsMaxDepth" size="2" maxlength="2" value="-1" />
+	      </dd>
+          <dt><label for="snapshot">Multiple Snapshot Versions</label></dt>
+          <dd>
+            <input type="radio" name="snapshotsReplaceOld" value="on" checked="checked"/> replace old snapshots with new one&nbsp;&nbsp;&nbsp;
+			<input type="radio" name="snapshotsReplaceOld" value="off" /> add new versions for each crawl
+          </dd>
+	      </dl>
+        </fieldset>
+	    #(/snapshotSelect)#
         <fieldset>
           <legend>Index Administration</legend>
           <dl>
diff --git a/htroot/CrawlStartExpert.java b/htroot/CrawlStartExpert.java
index b3f2a6640..425ba8b89 100644
--- a/htroot/CrawlStartExpert.java
+++ b/htroot/CrawlStartExpert.java
@@ -29,6 +29,7 @@ import java.util.List;
 
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.RequestHeader;
+import net.yacy.cora.util.Html2Image;
 import net.yacy.crawler.data.CrawlProfile;
 import net.yacy.search.Switchboard;
 import net.yacy.search.schema.CollectionSchema;
@@ -511,6 +512,15 @@ public class CrawlStartExpert {
                 ClientIdentification.yacyInternetCrawlerAgentName);
 
 
+        // ---------- Snapshot generation
+        if (sb.getConfigBool("isTransparentProxy", false) &&
+            sb.getConfigBool("proxyAlwaysFresh", false) &&
+             Html2Image.wkhtmltopdfAvailable() && Html2Image.convertAvailable()) {
+            prop.put("snapshotSelect", 1);
+        } else {
+            prop.put("snapshotSelect", 0);
+        }
+
         // ---------- Index Administration
         // Do Local Indexing
         if (post == null) {
@@ -548,7 +558,7 @@ public class CrawlStartExpert {
                 prop.put("collection", collectionEnabled ? defaultCollection : "");
             }
         }
-
+        
         // return rewrite properties
         return prop;
     }
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 5fc6e2d4d..b9e44265a 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -436,6 +436,11 @@ public class Crawler_p {
                     // check crawlurl was given in sitecrawl
                     if ("url".equals(crawlingMode) && rootURLs.size() == 0) hasCrawlstartDataOK = false;
                 }
+               
+                String snapshotsMaxDepthString = post.get("snapshotsMaxDepth", "-1");
+                int snapshotsMaxDepth = Integer.parseInt(snapshotsMaxDepthString);
+                boolean snapshotsReplaceOld = post.getBoolean("snapshotsReplaceOld");
+                
                 // prepare a new crawling profile
                 final CrawlProfile profile;
                 byte[] handle;
@@ -462,7 +467,8 @@ public class Crawler_p {
                             indexMedia,
                             storeHTCache,
                             crawlOrder,
-                            -1, // temporary; stub commit
+                            snapshotsMaxDepth,
+                            snapshotsReplaceOld,
                             cachePolicy,
                             collection,
                             agentName);
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index dfad2b5a2..7f288c821 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -152,7 +152,7 @@ public class QuickCrawlLink_p {
                         obeyHtmlRobotsNoindex, obeyHtmlRobotsNofollow,
                         indexText, indexMedia,
                         storeHTCache, remoteIndexing,
-                        -1,
+                        -1, true,
                         CacheStrategy.IFFRESH,
                         collection,
                         ClientIdentification.yacyIntranetCrawlerAgentName);
diff --git a/source/net/yacy/cora/util/Html2Image.java b/source/net/yacy/cora/util/Html2Image.java
index 3088e604a..a5e459632 100644
--- a/source/net/yacy/cora/util/Html2Image.java
+++ b/source/net/yacy/cora/util/Html2Image.java
@@ -37,12 +37,8 @@ import java.awt.Graphics;
 import java.awt.image.BufferedImage;
 import java.beans.PropertyChangeEvent;
 import java.beans.PropertyChangeListener;
-import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.List;
 
 public class Html2Image {
     
@@ -58,11 +54,11 @@ public class Html2Image {
     private final static File convertDebian = new File("/usr/bin/convert");
 
 
-    public boolean wkhtmltopdfAvailable() {
+    public static boolean wkhtmltopdfAvailable() {
         return wkhtmltopdfMac.exists() || wkhtmltopdfDebian.exists();
     }
     
-    public boolean convertAvailable() {
+    public static boolean convertAvailable() {
         return convertMac.exists() || convertDebian.exists();
     }
     
@@ -77,7 +73,7 @@ public class Html2Image {
         final File wkhtmltopdf = wkhtmltopdfMac.exists() ? wkhtmltopdfMac : wkhtmltopdfDebian;
         
         try {
-            OS.execSynchronous(wkhtmltopdf.getAbsolutePath() + (proxy == null ? " " : " --proxy " + proxy + " ") + url + " " + destination.getAbsolutePath());
+            OS.execSynchronous(wkhtmltopdf.getAbsolutePath() + " --title " + url + (proxy == null ? " " : " --proxy " + proxy + " ") + url + " " + destination.getAbsolutePath());
             return destination.exists();
         } catch (IOException e) {
             e.printStackTrace();
diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java
index 7fb2af71e..11acbcf92 100644
--- a/source/net/yacy/crawler/CrawlSwitchboard.java
+++ b/source/net/yacy/crawler/CrawlSwitchboard.java
@@ -293,7 +293,7 @@ public final class CrawlSwitchboard {
                 sb.getConfigBool(SwitchboardConstants.PROXY_INDEXING_LOCAL_MEDIA, true),
                 true,
                 sb.getConfigBool(SwitchboardConstants.PROXY_INDEXING_REMOTE, false),
-                -1,
+                -1, true,
                 CacheStrategy.IFFRESH,
                 "robot_" + CRAWL_PROFILE_PROXY,
                 ClientIdentification.yacyProxyAgentName);
@@ -323,7 +323,7 @@ public final class CrawlSwitchboard {
                 true,
                 false,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFFRESH,
                 "robot_" + CRAWL_PROFILE_REMOTE,
                 ClientIdentification.yacyInternetCrawlerAgentName);
@@ -353,7 +353,7 @@ public final class CrawlSwitchboard {
                 false,
                 true,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
@@ -383,7 +383,7 @@ public final class CrawlSwitchboard {
                 true,
                 true,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
@@ -414,7 +414,7 @@ public final class CrawlSwitchboard {
                 false,
                 true,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_GREEDY_LEARNING_TEXT,
                 ClientIdentification.browserAgentName);
@@ -444,7 +444,7 @@ public final class CrawlSwitchboard {
                 false,
                 true,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
@@ -474,7 +474,7 @@ public final class CrawlSwitchboard {
                 true,
                 true,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.IFEXIST,
                 "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
@@ -504,7 +504,7 @@ public final class CrawlSwitchboard {
                 false,
                 false,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.NOCACHE,
                 "robot_" + CRAWL_PROFILE_SURROGATE,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
@@ -537,7 +537,7 @@ public final class CrawlSwitchboard {
                 true,
                 false,
                 false,
-                -1,
+                -1, true,
                 CacheStrategy.NOCACHE,
                 collection,
                 ClientIdentification.yacyIntranetCrawlerAgentName);
diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java
index c44cd6fea..e4ae27b39 100644
--- a/source/net/yacy/crawler/data/CrawlProfile.java
+++ b/source/net/yacy/crawler/data/CrawlProfile.java
@@ -86,7 +86,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     public static final String INDEXING_URL_MUSTNOTMATCH     = "indexURLMustNotMatch";
     public static final String INDEXING_CONTENT_MUSTMATCH    = "indexContentMustMatch";
     public static final String INDEXING_CONTENT_MUSTNOTMATCH = "indexContentMustNotMatch";
-    public static final String LOADPREVIEWMAXDEPTH           = "loadpreviewmaxdepth"; // if previews shall be loaded, this is positive and denotes the maximum depth; if not this is -1
+    public static final String SNAPSHOTS_MAXDEPTH            = "snapshotsMaxDepth"; // if previews shall be loaded, this is positive and denotes the maximum depth; if not this is -1
+    public static final String SNAPSHOTS_REPLACEOLD          = "snapshotsReplaceOld"; // if this is set to true, only one version of a snapshot per day is stored, otherwise we store also different versions per day
 
     private Pattern crawlerurlmustmatch = null, crawlerurlmustnotmatch = null;
     private Pattern crawleripmustmatch = null, crawleripmustnotmatch = null;
@@ -142,7 +143,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
                  final boolean indexMedia,
                  final boolean storeHTCache,
                  final boolean remoteIndexing,
-                 final int loadPreviewMaxdepth,
+                 final int snapshotsMaxDepth,
+                 final boolean snapshotsReplaceOld,
                  final CacheStrategy cacheStrategy,
                  final String collections,
                  final String userAgentName) {
@@ -178,7 +180,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         put(INDEX_MEDIA,      indexMedia);
         put(STORE_HTCACHE,    storeHTCache);
         put(REMOTE_INDEXING,  remoteIndexing);
-        put(LOADPREVIEWMAXDEPTH, loadPreviewMaxdepth);
+        put(SNAPSHOTS_MAXDEPTH, snapshotsMaxDepth);
+        put(SNAPSHOTS_REPLACEOLD, snapshotsReplaceOld);
         put(CACHE_STRAGEGY,   cacheStrategy.toString());
         put(COLLECTIONS,      CommonPattern.SPACE.matcher(collections.trim()).replaceAll(""));
     }
@@ -575,8 +578,8 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         return (r.equals(Boolean.TRUE.toString()));
     }
     
-    public int loadPreviewMaxdepth() {
-        final String r = get(LOADPREVIEWMAXDEPTH);
+    public int snapshotMaxdepth() {
+        final String r = get(SNAPSHOTS_MAXDEPTH);
         if (r == null) return -1;
         try {
             final int i = Integer.parseInt(r);
@@ -588,6 +591,12 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
         }
     }
 
+    public boolean snapshotReplaceold() {
+        final String r = get(SNAPSHOTS_REPLACEOLD);
+        if (r == null) return false;
+        return (r.equals(Boolean.TRUE.toString()));
+    }
+
     /**
      * get a recrawl date for a given age in minutes
      * @param oldTimeMinutes
diff --git a/source/net/yacy/crawler/data/Snapshots.java b/source/net/yacy/crawler/data/Snapshots.java
index 404176af1..fb9fe3f34 100644
--- a/source/net/yacy/crawler/data/Snapshots.java
+++ b/source/net/yacy/crawler/data/Snapshots.java
@@ -31,6 +31,8 @@ import org.apache.solr.common.SolrDocument;
 import net.yacy.cora.date.GenericFormatter;
 import net.yacy.cora.document.encoding.ASCII;
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.util.Html2Image;
 import net.yacy.search.index.Fulltext;
 import net.yacy.search.schema.CollectionSchema;
 
@@ -68,13 +70,15 @@ public class Snapshots {
      * @param proxy - a string of the form 'http://<host>:<port>
      * @return
      */
-    public File downloadPDFSnapshot(final DigestURL url, final int depth, final Date date, String proxy) {
+    public File downloadPDFSnapshot(final DigestURL url, final int depth, final Date date, boolean replaceOld, String proxy) {
+        Collection<File> oldPaths = findPaths(url, depth);
+        if (replaceOld) {
+            for (File oldPath: oldPaths) oldPath.delete();
+        }
         File path = definePath(url, "pdf", depth, date);
         path.getParentFile().mkdirs();
-        
-        // STUB
-        
-        return path;
+        boolean success = Html2Image.writeWkhtmltopdf(url.toNormalform(true), proxy, path);
+        return success ? path : null;
     }
     
     /**
@@ -122,9 +126,9 @@ public class Snapshots {
      * @param ext
      * @return a set of files for snapshots of the url
      */
-    public Collection<File> findPaths(final DigestURL url, final String ext) {
+    public Collection<File> findPaths(final DigestURL url) {
         for (int i = 0; i < 100; i++) {
-            Collection<File> paths = findPaths(url, ext, i);
+            Collection<File> paths = findPaths(url, i);
             if (paths.size() > 0) return paths;
         }
         return new ArrayList<>(0);
@@ -138,20 +142,23 @@ public class Snapshots {
      * @param depth
      * @return a set of files for snapshots of the url
      */
-    public Collection<File> findPaths(final DigestURL url, final String ext, final int depth) {
+    public Collection<File> findPaths(final DigestURL url, final int depth) {
         String id = ASCII.String(url.hash());
         File pathToShard = pathToShard(url, depth);
-        String[] list = pathToShard.list();
+        String[] list = pathToShard.exists() && pathToShard.isDirectory() ? pathToShard.list() : null; // may be null if path does not exist
         ArrayList<File> paths = new ArrayList<>();
-        for (String f: list) {
-            if (f.startsWith(id) && f.endsWith(ext)) paths.add(new File(pathToShard, f));
+        if (list != null) {
+            final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
+            for (String f: list) {
+                if (f.startsWith(id) && f.endsWith(ext)) paths.add(new File(pathToShard, f));
+            }
         }
         return paths;
     }
     
     private File pathToShard(final DigestURL url, final int depth) {
         String id = ASCII.String(url.hash());
-        File pathToHostDir = new File(storageLocation, url.getHost() + ":" + url.getPort());
+        File pathToHostDir = new File(storageLocation, url.getHost() + "." + url.getPort());
         File pathToDepthDir = new File(pathToHostDir, depth < 10 ? "0" + depth : Integer.toString(depth));
         File pathToShard = new File(pathToDepthDir, id.substring(0, 2));
         return pathToShard;
diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java
index 748b314f0..844310659 100644
--- a/source/net/yacy/crawler/retrieval/HTTPLoader.java
+++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java
@@ -24,10 +24,12 @@
 
 package net.yacy.crawler.retrieval;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.Date;
 
 import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
 import net.yacy.cora.federate.solr.FailCategory;
 import net.yacy.cora.protocol.ClientIdentification;
 import net.yacy.cora.protocol.HeaderFramework;
@@ -76,8 +78,11 @@ public final class HTTPLoader {
         Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start);
         
         // load pdf in case that is wanted. This can later be used to compute a web page preview in the search results
-        if (entry.depth() <= profile.loadPreviewMaxdepth() && "html|shtml|php".indexOf(entry.url().getFile()) >= 0) {
-            sb.snapshots.downloadPDFSnapshot(entry.url(), entry.depth(), new Date(), "http://127.0.0.1:" + sb.getConfigInt("port", 8090));
+        boolean depthok = profile != null && entry.depth() <= profile.snapshotMaxdepth();
+        boolean extok = entry.url().getFile().length() == 0 || "html|shtml|php".indexOf(MultiProtocolURL.getFileExtension(entry.url().getFile())) >= 0;
+        if (depthok && extok) {
+            File snapshotFile = sb.snapshots.downloadPDFSnapshot(entry.url(), entry.depth(), new Date(), profile.snapshotReplaceold(), sb.getConfigBool("isTransparentProxy", false) ? "http://127.0.0.1:" + sb.getConfigInt("port", 8090) : null);
+            this.log.info("SNAPSHOT - " + (snapshotFile == null ? "could not generate snapshot for " + entry.url().toNormalform(true) : "wrote " + snapshotFile + " for " + entry.url().toNormalform(true)));
         }
         return doc;
     }
diff --git a/source/net/yacy/data/ymark/YMarkCrawlStart.java b/source/net/yacy/data/ymark/YMarkCrawlStart.java
index 6b0899a6d..103af2add 100644
--- a/source/net/yacy/data/ymark/YMarkCrawlStart.java
+++ b/source/net/yacy/data/ymark/YMarkCrawlStart.java
@@ -186,7 +186,7 @@ public class YMarkCrawlStart extends HashMap<String,String>{
 		                crawlingQ,
 		                true, true, true, false,
 		                true, true, false,
-		                -1,
+		                -1, true,
 		                CacheStrategy.IFFRESH,
 		                "robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
 		                ClientIdentification.yacyIntranetCrawlerAgentName); // TODO: make this a default profile in CrawlSwitchboard
diff --git a/source/net/yacy/http/ProxyCacheHandler.java b/source/net/yacy/http/ProxyCacheHandler.java
index 58f3efd03..392372937 100644
--- a/source/net/yacy/http/ProxyCacheHandler.java
+++ b/source/net/yacy/http/ProxyCacheHandler.java
@@ -70,7 +70,7 @@ public class ProxyCacheHandler extends AbstractRemoteHandler implements Handler
                 final net.yacy.crawler.retrieval.Request yacyRequest = new net.yacy.crawler.retrieval.Request(
                         null,
                         url,
-                        proxyHeaders.referer() == null ? null : new DigestURL(proxyHeaders.referer().toString()).hash(),
+                        proxyHeaders.referer() == null ? null : new DigestURL(proxyHeaders.referer().toNormalform(true)).hash(),
                         "",
                         cachedResponseHeader.lastModified(),
                         sb.crawler.defaultProxyProfile.handle(),
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 571b8221f..74bd8293d 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -346,7 +346,6 @@ public final class Switchboard extends serverSwitch {
         this.htDocsPath =
             getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT);
         this.log.config("HTDOCS Path:    " + this.htDocsPath.toString());
-        this.snapshots = new Snapshots(new File(this.htDocsPath, "SNAPSHOTS"));
         this.workPath = getDataPath(SwitchboardConstants.WORK_PATH, SwitchboardConstants.WORK_PATH_DEFAULT);
         this.workPath.mkdirs();
         // if default work files exist, copy them (don't overwrite existing!)
@@ -695,7 +694,8 @@ public final class Switchboard extends serverSwitch {
         final long maxCacheSize =
             1024L * 1024L * Long.parseLong(getConfig(SwitchboardConstants.PROXY_CACHE_SIZE, "2")); // this is megabyte
         Cache.init(this.htCachePath, this.peers.mySeed().hash, maxCacheSize);
-
+        this.snapshots = new Snapshots(new File(this.htCachePath, "SNAPSHOTS"));
+        
         // create the surrogates directories
         this.surrogatesInPath =
             getDataPath(