enhanced Host Balancer strategy: fair round robin

pull/1/head
orbiter 11 years ago
parent 0c88a32c36
commit 2f63bd0261

@ -239,8 +239,7 @@ public class HostBalancer implements Balancer {
// refresh the round-robin cache // refresh the round-robin cache
this.roundRobinHostHashes.addAll(this.queues.keySet()); this.roundRobinHostHashes.addAll(this.queues.keySet());
// quickly get rid of small stacks to reduce number of files: // quickly get rid of small stacks to reduce number of files:
if (this.roundRobinHostHashes.size() > 100) { // remove all stacks with more than 10 entries
// if there are stacks with less than 10 entries, remove all stacks with more than 10 entries
// this shall kick out small stacks to prevent that too many files are opened for very wide crawls // this shall kick out small stacks to prevent that too many files are opened for very wide crawls
boolean smallStacksExist = false; boolean smallStacksExist = false;
boolean singletonStacksExist = false; boolean singletonStacksExist = false;
@ -252,24 +251,22 @@ public class HostBalancer implements Balancer {
if (size <= 10) {smallStacksExist = true; break smallsearch;} if (size <= 10) {smallStacksExist = true; break smallsearch;}
} }
} }
if (singletonStacksExist) { if (singletonStacksExist || smallStacksExist) {
Iterator<String> i = this.roundRobinHostHashes.iterator(); Iterator<String> i = this.roundRobinHostHashes.iterator();
while (i.hasNext()) { smallstacks: while (i.hasNext()) {
if (this.roundRobinHostHashes.size() <= 10) break smallstacks; // don't shrink the hosts until nothing is left
String s = i.next(); String s = i.next();
HostQueue hq = this.queues.get(s); HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue;} if (hq == null) {i.remove(); continue smallstacks;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); int size = hq.size();
if (hq.size() != 1 && delta > 10) {i.remove();} if (singletonStacksExist) {
if (size != 1) {i.remove(); continue smallstacks;}
} else {
if (size > 10) {i.remove(); continue smallstacks;}
} }
} else if (smallStacksExist) { // to protect all small stacks which have a fast throughput, remove all with long wainting time
Iterator<String> i = this.roundRobinHostHashes.iterator();
while (i.hasNext()) {
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent); int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (hq.size() > 10 && delta > 10) {i.remove();} if (delta >= 1000) {i.remove();}
}
} }
} }
} }
@ -292,9 +289,7 @@ public class HostBalancer implements Balancer {
} }
if (rhq == null) { if (rhq == null) {
// second strategy: take from the largest stack and clean round robin cache // second strategy: take from the largest stack
// if we would not clear the round robin cache afterwards
// then all targets would be accessed equally which makes this strategy useless
int largest = Integer.MIN_VALUE; int largest = Integer.MIN_VALUE;
for (String h: this.roundRobinHostHashes) { for (String h: this.roundRobinHostHashes) {
HostQueue hq = this.queues.get(h); HostQueue hq = this.queues.get(h);
@ -306,13 +301,29 @@ public class HostBalancer implements Balancer {
} }
} }
} }
this.roundRobinHostHashes.clear(); // start from the beginning next time
rhq = this.queues.get(rhh); rhq = this.queues.get(rhh);
} }
} }
if (rhq == null) continue tryagain; if (rhq == null) continue tryagain;
long timestamp = System.currentTimeMillis();
Request request = rhq.pop(delay, cs, robots); // this pop is outside of synchronization to prevent blocking of pushes Request request = rhq.pop(delay, cs, robots); // this pop is outside of synchronization to prevent blocking of pushes
long actualwaiting = System.currentTimeMillis() - timestamp;
if (actualwaiting > 1000) {
synchronized (this) {
// to prevent that this occurs again, remove all stacks with positive delay times (which may be less after that waiting)
Iterator<String> i = this.roundRobinHostHashes.iterator();
protectcheck: while (i.hasNext()) {
if (this.roundRobinHostHashes.size() <= 3) break protectcheck; // don't shrink the hosts until nothing is left
String s = i.next();
HostQueue hq = this.queues.get(s);
if (hq == null) {i.remove(); continue protectcheck;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (delta >= 0) {i.remove();}
}
}
}
int size = rhq.size(); int size = rhq.size();
if (size == 0) { if (size == 0) {

Loading…
Cancel
Save