refactoring

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@8033 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent e914a30099
commit 775b44017e

@ -35,8 +35,6 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
@ -47,6 +45,8 @@ import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.io.ByteCount;
import org.apache.log4j.Logger;
public class RobotsTxt {
private static Logger log = Logger.getLogger(RobotsTxt.class);
@ -54,7 +54,7 @@ public class RobotsTxt {
protected static final String ROBOTS_DB_PATH_SEPARATOR = ";";
protected static final Pattern ROBOTS_DB_PATH_SEPARATOR_MATCHER = Pattern.compile(ROBOTS_DB_PATH_SEPARATOR);
BEncodedHeap robotsTable;
private final BEncodedHeap robotsTable;
private final ConcurrentHashMap<String, DomSync> syncObjects;
//private static final HashSet<String> loadedRobots = new HashSet<String>(); // only for debugging
@ -64,14 +64,14 @@ public class RobotsTxt {
public RobotsTxt(final BEncodedHeap robotsTable) {
this.robotsTable = robotsTable;
syncObjects = new ConcurrentHashMap<String, DomSync>();
this.syncObjects = new ConcurrentHashMap<String, DomSync>();
log.info("initiated robots table: " + robotsTable.getFile());
}
public void clear() {
log.info("clearing robots table");
this.robotsTable.clear();
syncObjects.clear();
this.syncObjects.clear();
}
public int size() {
@ -86,12 +86,12 @@ public class RobotsTxt {
private RobotsTxtEntry getEntry(final MultiProtocolURI theURL, final Set<String> thisAgents, final boolean fetchOnlineIfNotAvailableOrNotFresh) throws IOException {
// this method will always return a non-null value
String urlHostPort = getHostPort(theURL);
final String urlHostPort = getHostPort(theURL);
RobotsTxtEntry robotsTxt4Host = null;
Map<String, byte[]> record;
try {
record = this.robotsTable.get(this.robotsTable.encodedKey(urlHostPort));
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
log.warn("memory exhausted", e);
record = null;
}
@ -119,7 +119,7 @@ public class RobotsTxt {
// to complete a download
try {
record = this.robotsTable.get(this.robotsTable.encodedKey(urlHostPort));
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
log.warn("memory exhausted", e);
record = null;
}
@ -175,11 +175,11 @@ public class RobotsTxt {
}
// store the data into the robots DB
int sz = this.robotsTable.size();
final int sz = this.robotsTable.size();
addEntry(robotsTxt4Host);
if (this.robotsTable.size() <= sz) {
log.fatal("new entry in robots.txt table failed, resetting database");
this.clear();
clear();
addEntry(robotsTxt4Host);
}
} else {

@ -535,6 +535,7 @@ public final class FileUtils {
key = escaped_backslash.matcher(key).replaceAll("\\");
String value = escaped_newline.matcher(line.substring(pos + 1).trim()).replaceAll("\n");
value = value.replace("\\\\", "\\"); // does not work: escaped_backslashbackslash.matcher(value).replaceAll("\\");
//System.out.println("key = " + key + ", value = " + value);
props.put(key, value);
}
}

Loading…
Cancel
Save