performance update

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5653 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent d884c4718a
commit 54625360f7

@ -58,8 +58,6 @@ public class URLAnalysis {
* processes to analyse URL lists
*/
private static final long cleanuplimit = 50 * 1024 * 1024;
public static yacyURL poison = null;
static {
try {
@ -132,6 +130,7 @@ public class URLAnalysis {
boolean gz = urlfile.endsWith(".gz");
String analysis = (gz) ? urlfile.substring(0, urlfile.length() - 3) + ".stats.gz" : urlfile + ".stats";
long cleanuplimit = Math.max(50 * 1024 * 1024, MemoryControl.available() / 8);
// start threads
ArrayBlockingQueue<yacyURL> in = new ArrayBlockingQueue<yacyURL>(1000);

@ -48,6 +48,9 @@ public class yacyURL implements Serializable {
private static final long serialVersionUID = -1173233022912141884L;
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
private static final Pattern backPathPattern = Pattern.compile("(/[^/]+(?<!/\\.{1,2})/)[.]{2}(?=/|$)|/\\.(?=/)|/(?=/)");
private static final Pattern patternDot = Pattern.compile("\\.");
private static final Pattern patternSlash = Pattern.compile("/");
private static final Pattern patternAmp = Pattern.compile("&");
// class variables
private String protocol, host, userInfo, path, quest, ref, hash;
@ -125,7 +128,7 @@ public class yacyURL implements Serializable {
// handle international domains
if (!Punycode.isBasic(host)) try {
final String[] domainParts = host.split("\\.");
final String[] domainParts = patternDot.split(host, 0);
StringBuilder buffer = new StringBuilder();
// encode each domainpart seperately
for(int i=0; i<domainParts.length; i++) {
@ -273,8 +276,8 @@ public class yacyURL implements Serializable {
}
private void escapePath() {
final String[] pathp = path.split("/", -1);
StringBuilder ptmp = new StringBuilder(pathp.length + 10);
final String[] pathp = patternSlash.split(path, 0);
StringBuilder ptmp = new StringBuilder(path.length() + 10);
for (int i = 0; i < pathp.length; i++) {
ptmp.append('/');
ptmp.append(escape(pathp[i]));
@ -287,8 +290,8 @@ public class yacyURL implements Serializable {
}
private void escapeQuest() {
final String[] questp = quest.split("&", -1);
StringBuilder qtmp = new StringBuilder(questp.length + 10);
final String[] questp = patternAmp.split(quest, 0);
StringBuilder qtmp = new StringBuilder(quest.length() + 10);
for (int i = 0; i < questp.length; i++) {
if (questp[i].indexOf('=') != -1) {
qtmp.append('&');
@ -362,10 +365,9 @@ public class yacyURL implements Serializable {
* @return The encoded string
*/
// from: http://www.w3.org/International/URLUTF8Encoder.java
public static StringBuilder escape(final String s)
{
final StringBuilder sbuf = new StringBuilder(s.length() + 10);
public static StringBuilder escape(final String s) {
final int len = s.length();
final StringBuilder sbuf = new StringBuilder(len + 10);
for (int i = 0; i < len; i++) {
final int ch = s.charAt(i);
if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
@ -704,7 +706,7 @@ public class yacyURL implements Serializable {
// find rootpath
int rootpathStart = 0;
int rootpathEnd = this.path.length() - 1;
if (this.path.startsWith("/"))
if (this.path.length() > 0 && this.path.charAt(0) == '/')
rootpathStart = 1;
if (this.path.endsWith("/"))
rootpathEnd = this.path.length() - 2;
@ -869,6 +871,8 @@ public class yacyURL implements Serializable {
public static void main(final String[] args) {
final String[][] test = new String[][]{
new String[]{null, "http://www.anomic.de"},
new String[]{null, "http://www.anomic.de/"},
new String[]{null, "http://www.anomic.de/home/test?x=1#home"},
new String[]{null, "http://www.anomic.de/home/test?x=1"},
new String[]{null, "http://www.anomic.de/home/test#home"},

Loading…
Cancel
Save