From 48aed15c48da8eafff51a902d0c759014c7c67d0 Mon Sep 17 00:00:00 2001 From: reger Date: Fri, 26 Sep 2014 23:49:10 +0200 Subject: [PATCH] skip loader wait cycle on concurrent access in nocache configuration. In nocache config resource is loaded online, leaving no benefit to wait for a faster cache hit. --- .../net/yacy/repository/LoaderDispatcher.java | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index b72fd8e1e..bfb145d9a 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -148,10 +148,22 @@ public final class LoaderDispatcher { return load(request, cacheStrategy, protocolMaxFileSize(request.url()), blacklistType, agent); } + /** + * loads a resource from cache or web/ftp/smb/file + * on concurrent execution waits max 5 sec for the prev. loader to fill the cache (except for CacheStrategy.NOCACHE) + * + * @param request the request essentials + * @param cacheStrategy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY + * @param maxFileSize + * @param blacklistType + * @param agent + * @return the loaded entity in a Response object + * @throws IOException + */ public Response load(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException { - Semaphore check = this.loaderSteering.get(request.url()); - if (check != null) { - // a loading process may be going on for that url + Semaphore check = this.loaderSteering.get(request.url()); + if (check != null && cacheStrategy != CacheStrategy.NOCACHE) { + // a loading process is going on for that url //ConcurrentLog.info("LoaderDispatcher", "waiting for " + request.url().toNormalform(true)); long t = System.currentTimeMillis(); try { check.tryAcquire(5, TimeUnit.SECONDS);} catch (final InterruptedException e) {} @@ -163,15 +175,14 @@ public final class LoaderDispatcher { this.loaderSteering.put(request.url(), new Semaphore(0)); try { final Response response = loadInternal(request, cacheStrategy, maxFileSize, blacklistType, agent); - check = this.loaderSteering.remove(request.url()); - if (check != null) check.release(1000); + // finally block cleans up loaderSteering and semaphore return response; - } catch (final Throwable e) { + } catch (final Throwable e) { throw new IOException(e); } finally { // release the semaphore anyway - check = this.loaderSteering.remove(request.url()); - if (check != null) check.release(1000); + check = this.loaderSteering.remove(request.url()); // = next caller goes directly to loadInternal (is ok we just wanted to fill cash) + if (check != null) check.release(1000); // don't block any other } } @@ -190,8 +201,8 @@ public final class LoaderDispatcher { final String host = url.getHost(); final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle())); - // check if url is in blacklist - if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) { + // check if url is in blacklist + if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) { this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$"); }