*) cleaner code

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7331 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
low012 15 years ago
parent 38fdf43587
commit eb79b952ef

@ -54,6 +54,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistError;
public class BlacklistCleaner_p {
@ -109,7 +110,7 @@ public class BlacklistCleaner_p {
}
// list illegal entries
final Map<String, Integer> illegalEntries = getIllegalEntries(blacklistToUse, Switchboard.urlBlacklist, allowRegex);
final Map<String, BlacklistError> illegalEntries = getIllegalEntries(blacklistToUse, Switchboard.urlBlacklist, allowRegex);
prop.put(RESULTS + "blList", blacklistToUse);
prop.put(RESULTS + "entries", illegalEntries.size());
prop.putHTML(RESULTS + "blEngine", Switchboard.urlBlacklist.getEngineInfo());
@ -118,9 +119,9 @@ public class BlacklistCleaner_p {
prop.put(RESULTS + DISABLED + "entries", illegalEntries.size());
int i = 0;
String key;
for (Entry<String, Integer> entry : illegalEntries.entrySet()) {
for (final Entry<String, BlacklistError> entry : illegalEntries.entrySet()) {
key = entry.getKey();
prop.put(RESULTS + DISABLED + ENTRIES + i + "_error", entry.getValue().longValue());
prop.put(RESULTS + DISABLED + ENTRIES + i + "_error", entry.getValue().getLong());
prop.putHTML(RESULTS + DISABLED + ENTRIES + i + "_entry", key);
i++;
}
@ -238,29 +239,29 @@ public class BlacklistCleaner_p {
* illegal by the blacklistEngine with the entry as key and an error code as
* value.
*/
private static Map<String, Integer> getIllegalEntries(final String blacklistToUse, final Blacklist blEngine, final boolean allowRegex) {
final Map<String, Integer> illegalEntries = new HashMap<String, Integer>();
private static Map<String, BlacklistError> getIllegalEntries(final String blacklistToUse, final Blacklist blEngine, final boolean allowRegex) {
final Map<String, BlacklistError> illegalEntries = new HashMap<String, BlacklistError>();
final Set<String> legalEntries = new HashSet<String>();
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
final Map<String, String> properties= new HashMap<String, String>();
properties.put("allowRegex", String.valueOf(allowRegex));
int err = 0;
BlacklistError err = BlacklistError.NO_ERROR;
for (String element : list) {
element = element.trim();
// check for double-occurance
if (legalEntries.contains(element)) {
illegalEntries.put(element, Integer.valueOf(Blacklist.ERR_DOUBLE_OCCURANCE));
illegalEntries.put(element, BlacklistError.DOUBLE_OCCURANCE);
continue;
}
legalEntries.add(element);
err = blEngine.checkError(element, properties);
if (err > 0) {
if (err.getInt() > 0) {
illegalEntries.put(element, err);
}
}
@ -277,14 +278,13 @@ public class BlacklistCleaner_p {
*/
private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file
final ArrayList<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
boolean listChanged = false;
// delete the old entry from file
String s;
for (int i=0; i<entries.length; i++) {
s = entries[i];
for (final String entry : entries) {
String s = entry;
if (list != null){
@ -301,14 +301,13 @@ public class BlacklistCleaner_p {
}
// remove the entry from the running blacklist engine
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", blacklistToUse)) {
for (final String supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
final String host = (s.indexOf('/') == -1) ? s : s.substring(0, s.indexOf('/'));
final String path = (s.indexOf('/') == -1) ? ".*" : s.substring(s.indexOf('/') + 1);
try {
Switchboard.urlBlacklist.remove(supportedBlacklistTypes[blTypes], host, path);
Switchboard.urlBlacklist.remove(supportedBlacklistType, host, path);
} catch (final RuntimeException e) {
//System.err.println(e.getMessage() + ": " + host + "/" + path);
Log.logSevere("BLACKLIST-CLEANER", e.getMessage() + ": " + host + "/" + path);
}
}
@ -339,20 +338,20 @@ public class BlacklistCleaner_p {
try {
pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklistToUse), true));
String host, path;
for (int i=0, pos; i<newEntry.length; i++) {
pos = newEntry[i].indexOf('/');
for (final String n : newEntry) {
int pos = n.indexOf('/');
if (pos < 0) {
host = newEntry[i];
host = n;
path = ".*";
} else {
host = newEntry[i].substring(0, pos);
path = newEntry[i].substring(pos + 1);
host = n.substring(0, pos);
path = n.substring(pos + 1);
}
pw.println(host + "/" + path);
for (int blTypes = 0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse)) {
for (final String s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(
supportedBlacklistTypes[blTypes],
s,
host,
path);
}

@ -1,9 +1,8 @@
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.order.Digest;
@ -20,16 +19,16 @@ public class get {
final Switchboard switchboard = (Switchboard) env;
final boolean isAdmin=switchboard.verifyAuthentication(header, true);
final serverObjects prop = new serverObjects();
String tag=null;
String date;
String tag = null;
final String date;
//String url=""; //urlfilter not yet implemented
if(post != null && post.containsKey("tag")){
if (post != null && post.containsKey("tag")) {
tag=post.get("tag");
}
if(post != null && post.containsKey("date")){
if (post != null && post.containsKey("date")) {
date=post.get("date");
}else{
} else {
date=DateFormatter.formatISO8601(new Date(System.currentTimeMillis()));
}
@ -40,16 +39,15 @@ public class get {
Date parsedDate = null;
try {
parsedDate = DateFormatter.parseISO8601(date);
} catch (final ParseException e) {
parsedDate = new Date();
}
parsedDate = DateFormatter.parseISO8601(date);
} catch (final ParseException e) {
parsedDate = new Date();
}
final ArrayList<String> bookmark_hashes=switchboard.bookmarksDB.getDate(Long.toString(parsedDate.getTime())).getBookmarkList();
final Iterator<String> it=bookmark_hashes.iterator();
BookmarksDB.Bookmark bookmark=null;
while(it.hasNext()){
bookmark=switchboard.bookmarksDB.getBookmark(it.next());
final List<String> bookmark_hashes = switchboard.bookmarksDB.getDate(Long.toString(parsedDate.getTime())).getBookmarkList();
BookmarksDB.Bookmark bookmark = null;
for (final String bookmark_hash : bookmark_hashes){
bookmark=switchboard.bookmarksDB.getBookmark(bookmark_hash);
if(DateFormatter.formatISO8601(new Date(bookmark.getTimeStamp())).equals(date) &&
tag==null || bookmark.getTags().contains(tag) &&
isAdmin || bookmark.getPublic()){

@ -1,10 +1,14 @@
// blogBoard.java
// BlogBoard.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 20.07.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -170,7 +174,7 @@ public class BlogBoard {
}
private boolean parseXMLimport(final Document doc) {
if(!doc.getDocumentElement().getTagName().equals("blog")) {
if(!"blog".equals(doc.getDocumentElement().getTagName())) {
return false;
}
@ -183,23 +187,23 @@ public class BlogBoard {
String key = null, ip = null, StrSubject = null, StrAuthor = null, StrPage = null, StrDate = null;
Date date = null;
if(!items.item(i).getNodeName().equals("item")) continue;
if(!"item".equals(items.item(i).getNodeName())) continue;
final NodeList currentNodeChildren = items.item(i).getChildNodes();
for (int j = 0, m = currentNodeChildren.getLength(); j < m; ++j) {
final Node currentNode = currentNodeChildren.item(j);
if (currentNode.getNodeName().equals("id")) {
if ("id".equals(currentNode.getNodeName())) {
key = currentNode.getFirstChild().getNodeValue();
} else if (currentNode.getNodeName().equals("ip")) {
} else if ("ip".equals(currentNode.getNodeName())) {
ip = currentNode.getFirstChild().getNodeValue();
} else if (currentNode.getNodeName().equals("timestamp")) {
} else if ("timestamp".equals(currentNode.getNodeName())) {
StrDate = currentNode.getFirstChild().getNodeValue();
} else if (currentNode.getNodeName().equals("subject")) {
} else if ("subject".equals(currentNode.getNodeName())) {
StrSubject = currentNode.getFirstChild().getNodeValue();
} else if (currentNode.getNodeName().equals("author")) {
} else if ("author".equals(currentNode.getNodeName())) {
StrAuthor = currentNode.getFirstChild().getNodeValue();
} else if (currentNode.getNodeName().equals("content")) {
} else if ("content".equals(currentNode.getNodeName())) {
StrPage = currentNode.getFirstChild().getNodeValue();
}
}
@ -371,7 +375,7 @@ public class BlogBoard {
if (this.record.get("comments") == null) {
this.record.put("comments", ListManager.collection2string(new ArrayList<String>()));
}
if (this.record.get("commentMode") == null || this.record.get("commentMode").equals("")) {
if (this.record.get("commentMode") == null || this.record.get("commentMode").length() < 1) {
this.record.put("commentMode", "2");
}
}

@ -1,10 +1,14 @@
// wikiBoard.java
// BlogBoardComments.java
// -------------------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 20.07.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -158,7 +162,7 @@ public class BlogBoardComments {
}
private boolean parseXMLimport(final Document doc) {
if(!doc.getDocumentElement().getTagName().equals("blog")) {
if(!"blog".equals(doc.getDocumentElement().getTagName())) {
return false;
}
@ -171,7 +175,7 @@ public class BlogBoardComments {
String key = null, ip = null, StrSubject = null, StrAuthor = null, StrPage = null, StrDate = null;
Date date = null;
if(!items.item(i).getNodeName().equals("item"))
if(!"item".equals(items.item(i).getNodeName()))
continue;
final NodeList currentNodeChildren = items.item(i).getChildNodes();
@ -179,17 +183,17 @@ public class BlogBoardComments {
for(int j=0, m = currentNodeChildren.getLength(); j < m; ++j) {
final Node currentNode = currentNodeChildren.item(j);
if (currentNode.getNodeName().equals("id")) {
if ("id".equals(currentNode.getNodeName())) {
key = currentNode.getFirstChild().getNodeValue();
} else if(currentNode.getNodeName().equals("ip")) {
} else if("ip".equals(currentNode.getNodeName())) {
ip = currentNode.getFirstChild().getNodeValue();
} else if(currentNode.getNodeName().equals("timestamp")) {
} else if("timestamp".equals(currentNode.getNodeName())) {
StrDate = currentNode.getFirstChild().getNodeValue();
} else if(currentNode.getNodeName().equals("subject")) {
} else if("subject".equals(currentNode.getNodeName())) {
StrSubject = currentNode.getFirstChild().getNodeValue();
} else if(currentNode.getNodeName().equals("author")) {
} else if("author".equals(currentNode.getNodeName())) {
StrAuthor = currentNode.getFirstChild().getNodeValue();
} else if(currentNode.getNodeName().equals("content")) {
} else if("content".equals(currentNode.getNodeName())) {
StrPage = currentNode.getFirstChild().getNodeValue();
}
}
@ -363,7 +367,7 @@ public class BlogBoardComments {
* @return
*/
public boolean isAllowed() {
return (record.get("moderated") != null) && record.get("moderated").equals("true");
return "true".equals(record.get("moderated"));
}
public void allow() {
record.put("moderated", "true");

@ -1,10 +1,14 @@
// BookmarkHelper.java
// BookmarkDate.java
// -------------------------------------
// part of YACY
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// Methods from this file has been originally contributed by Alexander Schier
// and had been refactored by Michael Christen for better a method structure 30.01.2010
//
@ -30,6 +34,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import de.anomic.data.BookmarksDB.Bookmark;
@ -43,7 +48,7 @@ public class BookmarkDate {
MapHeap datesTable;
public BookmarkDate(File datesFile) throws IOException {
public BookmarkDate(final File datesFile) throws IOException {
this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
@ -68,18 +73,16 @@ public class BookmarkDate {
// rebuilds the datesDB from the bookmarksDB
public void init(final Iterator<Bookmark> it) {
Log.logInfo("BOOKMARKS", "start init dates.db from bookmarks.db...");
//final Iterator<Bookmark> it=bookmarkIterator(true);
Bookmark bookmark;
String date;
Entry bmDate;
int count = 0;
while (it.hasNext()) {
bookmark=it.next();
// if (bookmark == null) continue;
bookmark = it.next();
date = String.valueOf(bookmark.getTimeStamp());
bmDate=getDate(date);
if(bmDate==null){
bmDate=new Entry(date);
if (bmDate == null) {
bmDate = new Entry(date);
}
bmDate.add(bookmark.getUrlHash());
bmDate.setDatesTable();
@ -98,49 +101,46 @@ public class BookmarkDate {
public Entry(final String mydate){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=new HashMap<String, String>();
date = String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem = new HashMap<String, String>();
mem.put(URL_HASHES, "");
}
public Entry(final String mydate, final Map<String, String> map){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=map;
date = String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem = map;
}
public Entry(final String mydate, final ArrayList<String> entries){
public Entry(final String mydate, final List<String> entries){
//round to seconds, but store as milliseconds (java timestamp)
date=String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem=new HashMap<String, String>();
date = String.valueOf((Long.parseLong(mydate)/1000)*1000);
mem = new HashMap<String, String>();
mem.put(URL_HASHES, ListManager.collection2string(entries));
}
public void add(final String urlHash){
final String urlHashes = mem.get(URL_HASHES);
ArrayList<String> list;
if(urlHashes != null && !urlHashes.equals("")){
list=ListManager.string2arraylist(urlHashes);
List<String> list;
if(urlHashes != null && !"".equals(urlHashes)){
list = ListManager.string2arraylist(urlHashes);
}else{
list=new ArrayList<String>();
list = new ArrayList<String>();
}
if(!list.contains(urlHash) && urlHash != null && !urlHash.equals("")){
if(!list.contains(urlHash) && urlHash != null && !"".equals(urlHashes)){
list.add(urlHash);
}
this.mem.put(URL_HASHES, ListManager.collection2string(list));
/*if(urlHashes!=null && !urlHashes.equals("") ){
if(urlHashes.indexOf(urlHash) <0){
this.mem.put(URL_HASHES, urlHashes+","+urlHash);
}
}else{
this.mem.put(URL_HASHES, urlHash);
}*/
}
public void delete(final String urlHash){
final ArrayList<String> list=ListManager.string2arraylist(this.mem.get(URL_HASHES));
final List<String> list = ListManager.string2arraylist(this.mem.get(URL_HASHES));
if(list.contains(urlHash)){
list.remove(urlHash);
}
this.mem.put(URL_HASHES, ListManager.collection2string(list));
}
public void setDatesTable() {
if (this.size() >0) {
try {
@ -156,12 +156,15 @@ public class BookmarkDate {
}
}
}
public String getDateString(){
return date;
}
public ArrayList<String> getBookmarkList(){
public List<String> getBookmarkList(){
return ListManager.string2arraylist(this.mem.get(URL_HASHES));
}
public int size(){
return ListManager.string2arraylist(this.mem.get(URL_HASHES)).size();
}

@ -143,11 +143,11 @@ public class BookmarkHelper {
writer.close();
links = scraper.getAnchors();
} catch (final IOException e) { Log.logWarning("BOOKMARKS", "error during load of links: "+ e.getClass() +" "+ e.getMessage());}
for (Entry<MultiProtocolURI, String> link: links.entrySet()) {
for (final Entry<MultiProtocolURI, String> link: links.entrySet()) {
url = link.getKey();
title = link.getValue();
Log.logInfo("BOOKMARKS", "links.get(url)");
if (title.equals("")) {//cannot be displayed
if ("".equals(title)) {//cannot be displayed
title = url.toString();
}
bm = db.new Bookmark(url.toString());
@ -176,13 +176,12 @@ public class BookmarkHelper {
}
private static int importFromXML(BookmarksDB db, final InputStream input, final boolean importPublic){
final DocumentBuilderFactory factory=DocumentBuilderFactory.newInstance();
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(false);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
final Document doc=builder.parse(input);
final DocumentBuilder builder = factory.newDocumentBuilder();
final Document doc = builder.parse(input);
return parseXMLimport(db, doc, importPublic);
} catch (final ParserConfigurationException e) {
} catch (final SAXException e) {
@ -194,35 +193,35 @@ public class BookmarkHelper {
private static int parseXMLimport(BookmarksDB db, final Node doc, final boolean importPublic){
int importCount = 0;
if (doc.getNodeName().equals("post")) {
if ("post".equals(doc.getNodeName())) {
final NamedNodeMap attributes = doc.getAttributes();
final String url=attributes.getNamedItem("href").getNodeValue();
if(url.equals("")){
final String url = attributes.getNamedItem("href").getNodeValue();
if("".equals(url)){
return 0;
}
final Bookmark bm=db.new Bookmark(url);
String tagsString="";
String title="";
String description="";
String time="";
if(attributes.getNamedItem("tag")!=null){
tagsString=attributes.getNamedItem("tag").getNodeValue();
final Bookmark bm = db.new Bookmark(url);
String tagsString = "";
String title = "";
String description = "";
String time = "";
if(attributes.getNamedItem("tag") != null){
tagsString = attributes.getNamedItem("tag").getNodeValue();
}
if(attributes.getNamedItem("description")!=null){
title=attributes.getNamedItem("description").getNodeValue();
if(attributes.getNamedItem("description") != null){
title = attributes.getNamedItem("description").getNodeValue();
}
if(attributes.getNamedItem("extended")!=null){
description=attributes.getNamedItem("extended").getNodeValue();
if(attributes.getNamedItem("extended") != null){
description = attributes.getNamedItem("extended").getNodeValue();
}
if(attributes.getNamedItem("time")!=null){
time=attributes.getNamedItem("time").getNodeValue();
if(attributes.getNamedItem("time") != null){
time = attributes.getNamedItem("time").getNodeValue();
}
Set<String> tags=new HashSet<String>();
Set<String> tags = new HashSet<String>();
if(title != null){
if (title != null) {
bm.setProperty(Bookmark.BOOKMARK_TITLE, title);
}
if(tagsString!=null){
if (tagsString != null) {
tags = ListManager.string2set(tagsString.replace(' ', ','));
}
bm.setTags(tags, true);
@ -262,7 +261,7 @@ public class BookmarkHelper {
while (tagIterator.hasNext()) {
tag=tagIterator.next();
if (tag.getFriendlyName().startsWith((root.equals("/") ? root : root+"/"))) {
if (tag.getFriendlyName().startsWith(("/".equals(root) ? root : root+"/"))) {
path = tag.getFriendlyName();
path = BookmarkHelper.cleanTagsString(path);
while(path.length() > 0 && !path.equals(root)){
@ -271,7 +270,7 @@ public class BookmarkHelper {
}
}
}
if (!root.equals("/")) { folders.add(root); }
if (!"/".equals(root)) { folders.add(root); }
folders.add("\uffff");
return folders.iterator();
}

@ -338,7 +338,7 @@ public class BookmarksDB {
tagSet = bookmark.getTags();
tagSet.remove(oldName);
bookmark.setTags(tagSet, true); // might not be needed, but doesn't hurt
if(!newName.equals("")) bookmark.addTag(newName);
if(!"".equals(newName)) bookmark.addTag(newName);
saveBookmark(bookmark);
}
return true;
@ -589,14 +589,14 @@ public class BookmarksDB {
public boolean getPublic(){
if(entry.containsKey(BOOKMARK_PUBLIC)){
return entry.get(BOOKMARK_PUBLIC).equals("public");
return "public".equals(entry.get(BOOKMARK_PUBLIC));
}
return false;
}
public boolean getFeed(){
if(entry.containsKey(BOOKMARK_IS_FEED)){
return entry.get(BOOKMARK_IS_FEED).equals("true");
return "true".equals(entry.get(BOOKMARK_IS_FEED));
}
return false;
}

@ -70,7 +70,7 @@ public class DidYouMean {
* @param index a termIndex - most likely retrieved from a switchboard object.
* @param sort true/false - sorts the resulting TreeSet by index.count(); <b>Warning:</b> this causes heavy i/o.
*/
public DidYouMean(final IndexCell<WordReference> index, String word0) {
public DidYouMean(final IndexCell<WordReference> index, final String word0) {
this.resultSet = Collections.synchronizedSortedSet(new TreeSet<String>(new headMatchingComparator(word0, WORD_LENGTH_COMPARATOR)));
this.word = word0.toLowerCase();
this.wordLen = word.length();
@ -103,8 +103,8 @@ public class DidYouMean {
}
}
private static final boolean isAlphabet(char[] alpha, char testchar) {
for (char a: alpha) if (a == testchar) return true;
private static final boolean isAlphabet(final char[] alpha, final char testchar) {
for (final char a: alpha) if (a == testchar) return true;
return false;
}
@ -122,28 +122,28 @@ public class DidYouMean {
* @param preSortSelection the number of words that participate in the IO-intensive sort
* @return
*/
public SortedSet<String> getSuggestions(long timeout, int preSortSelection) {
public SortedSet<String> getSuggestions(final long timeout, final int preSortSelection) {
if (this.word.length() < MinimumInputWordLength) return this.resultSet; // return nothing if input is too short
long startTime = System.currentTimeMillis();
long timelimit = startTime + timeout;
final long startTime = System.currentTimeMillis();
final long timelimit = startTime + timeout;
if (this.word.indexOf(' ') > 0) return getSuggestions(this.word.split(" "), timeout, preSortSelection, this.index);
SortedSet<String> preSorted = getSuggestions(timeout);
final SortedSet<String> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) {
Log.logInfo("DidYouMean", "found and returned " + preSorted.size() + " unsorted suggestions (1); execution time: "
+ (System.currentTimeMillis() - startTime) + "ms");
return preSorted;
}
DynamicScore<String> scored = new ScoreCluster<String>();
final DynamicScore<String> scored = new ScoreCluster<String>();
for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (scored.size() >= 2 * preSortSelection) break;
scored.inc(s, index.count(Word.word2hash(s)));
}
SortedSet<String> countSorted = Collections.synchronizedSortedSet(new TreeSet<String>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this
final SortedSet<String> countSorted = Collections.synchronizedSortedSet(new TreeSet<String>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
final int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this
while (scored.size() > 0 && countSorted.size() < preSortSelection) {
String s = scored.getMaxKey();
final String s = scored.getMaxKey();
int score = scored.delete(s);
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);
if (System.currentTimeMillis() > timelimit) break;
@ -169,7 +169,7 @@ public class DidYouMean {
* @return
*/
@SuppressWarnings("unchecked")
private static SortedSet<String> getSuggestions(final String[] words, long timeout, int preSortSelection, final IndexCell<WordReference> index) {
private static SortedSet<String> getSuggestions(final String[] words, final long timeout, final int preSortSelection, final IndexCell<WordReference> index) {
final SortedSet<String>[] s = new SortedSet[words.length];
for (int i = 0; i < words.length; i++) {
s[i] = new DidYouMean(index, words[i]).getSuggestions(timeout / words.length, preSortSelection);
@ -195,14 +195,14 @@ public class DidYouMean {
* @param timeout execution time in ms.
* @return a Set&lt;String&gt; with word variations contained in term index.
*/
private SortedSet<String> getSuggestions(long timeout) {
private SortedSet<String> getSuggestions(final long timeout) {
long startTime = System.currentTimeMillis();
this.timeLimit = startTime + timeout;
// create one consumer thread that checks the guessLib queue
// for occurrences in the index. If the producers are started next, their
// results can be consumers directly
Consumer[] consumers = new Consumer[AVAILABLE_CPU];
final Consumer[] consumers = new Consumer[AVAILABLE_CPU];
consumers[0] = new Consumer();
consumers[0].start();
@ -219,7 +219,7 @@ public class DidYouMean {
// the CPU load to create the guessed words is very low, but the testing
// against the library may be CPU intensive. Since it is possible to test
// words in the library concurrently, it is a good idea to start separate threads
Thread[] producers = new Thread[4];
final Thread[] producers = new Thread[4];
producers[0] = new ChangingOneLetter();
producers[1] = new AddingOneLetter();
producers[2] = new DeletingOneLetter();
@ -261,7 +261,7 @@ public class DidYouMean {
}
private void test(final String s) throws InterruptedException {
Set<String> libr = LibraryProvider.dymLib.recommend(s);
final Set<String> libr = LibraryProvider.dymLib.recommend(s);
libr.addAll(LibraryProvider.geoLoc.recommend(s));
if (!libr.isEmpty()) createGen = false;
for (final String t: libr) {
@ -398,7 +398,7 @@ public class DidYouMean {
private static class headMatchingComparator implements Comparator<String> {
private final String head;
private final Comparator<String> secondaryComparator;
public headMatchingComparator(String head, Comparator<String> secondaryComparator) {
public headMatchingComparator(final String head, final Comparator<String> secondaryComparator) {
this.head = head.toLowerCase();
this.secondaryComparator = secondaryComparator;
}

@ -70,7 +70,7 @@ public class WikiCode extends AbstractWikiParser implements WikiParser {
Tags(final String openWiki, final String closeWiki, final String openHTML, final String closeHTML) {
if (openHTML == null || closeHTML == null || openWiki == null || closeWiki == null) {
throw new IllegalArgumentException("");
throw new IllegalArgumentException("Parameter may not be null.");
}
this.openHTML = openHTML;

@ -155,7 +155,7 @@ public class Browser {
}
public static void main(final String[] args) {
if (args[0].equals("-u")) {
if ("-u".equals(args[0])) {
openBrowser(args[1]);
}
}

@ -127,7 +127,7 @@ public abstract class AbstractWriter extends AbstractReader implements Writer {
int pos;
while ((line = br.readLine()) != null) { // very slow readLine????
line = line.trim();
if (line.equals("# EOF")) return map;
if ("# EOF".equals(line)) return map;
if ((line.length() == 0) || (line.charAt(0) == '#')) continue;
pos = line.indexOf('=');
if (pos < 0) continue;

@ -74,8 +74,8 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Com
byte acc, bcc;
byte c;
// pre-compute comparisment results: this omits one single ahpla lookup during comparisment
for (char ac: alpha) {
for (char bc: alpha) {
for (final char ac: alpha) {
for (final char bc: alpha) {
acc = ahpla[ac];
bcc = ahpla[bc];
c = 0;
@ -92,9 +92,11 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Com
return new HandleSet(keylength, this, space);
}
public static byte[] zero(int length) {
public static byte[] zero(final int length) {
final byte[] z = new byte[length];
while (length > 0) { length--; z[length] = (byte) alpha_standard[0]; }
for (byte b : z) {
b = (byte) alpha_standard[0];
}
return z;
}
@ -119,10 +121,10 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Com
}
public final static ByteOrder bySignature(final String signature) {
if (signature.equals("Bd")) return new Base64Order(false, false);
if (signature.equals("bd")) return new Base64Order(false, true);
if (signature.equals("Bu")) return new Base64Order(true, false);
if (signature.equals("bu")) return new Base64Order(true, true);
if ("Bd".equals(signature)) return new Base64Order(false, false);
if ("bd".equals(signature)) return new Base64Order(false, true);
if ("Bu".equals(signature)) return new Base64Order(true, false);
if ("bu".equals(signature)) return new Base64Order(true, true);
return null;
}
@ -533,27 +535,27 @@ public class Base64Order extends AbstractOrder<byte[]> implements ByteOrder, Com
System.out.println("usage: -[ec|dc|es|ds|clcn] <arg>");
System.exit(0);
}
if (s[0].equals("-ec")) {
if ("-ec".equals(s[0])) {
// generate a b64 encoding from a given cardinal
System.out.println(b64.encodeLong(Long.parseLong(s[1]), 4));
}
if (s[0].equals("-dc")) {
if ("-dc".equals(s[0])) {
// generate a b64 decoding from a given cardinal
System.out.println(b64.decodeLong(s[1]));
}
if (s[0].equals("-es")) {
if ("-es".equals(s[0])) {
// generate a b64 encoding from a given string
System.out.println(b64.encodeString(s[1]));
}
if (s[0].equals("-ds")) {
if ("-ds".equals(s[0])) {
// generate a b64 decoding from a given string
System.out.println(b64.decodeString(s[1]));
}
if (s[0].equals("-cl")) {
if ("-cl".equals(s[0])) {
// return the cardinal of a given string as long value with the enhanced encoder
System.out.println(Base64Order.enhancedCoder.cardinal(s[1].getBytes()));
}
if (s[0].equals("-cn")) {
if ("-cn".equals(s[0])) {
// return the cardinal of a given string as normalized float 0 .. 1 with the enhanced encoder
System.out.println(((double) Base64Order.enhancedCoder.cardinal(s[1].getBytes())) / ((double) Long.MAX_VALUE));
}

@ -68,7 +68,7 @@ public class AttrSeq {
this.structure = null;
this.created = -1;
this.name = "";
this.entries = (tree) ? (Map<String, Object>) new TreeMap<String, Object>() : (Map<String, Object>) new HashMap<String, Object>();
this.entries = (tree) ? new TreeMap<String, Object>() : new HashMap<String, Object>();
readAttrFile(file);
}
@ -77,7 +77,7 @@ public class AttrSeq {
this.structure = new Structure(struct);
this.created = System.currentTimeMillis();
this.name = name;
this.entries = (tree) ? (Map<String, Object>) new TreeMap<String, Object>() : (Map<String, Object>) new HashMap<String, Object>();
this.entries = (tree) ? new TreeMap<String, Object>() : new HashMap<String, Object>();
}
public void setLogger(final Logger newLogger) {
@ -170,16 +170,13 @@ public class AttrSeq {
public void toFile(final File out) throws IOException {
// generate header
final StringBuilder sb = new StringBuilder(2000);
sb.append("# Name=" + this.name); sb.append((char) 13); sb.append((char) 10);
sb.append("# Created=" + this.created); sb.append((char) 13); sb.append((char) 10);
sb.append("# Structure=" + this.structure.toString()); sb.append((char) 13); sb.append((char) 10);
sb.append("# Name="); sb.append(this.name); sb.append((char) 13); sb.append((char) 10);
sb.append("# Created="); sb.append(this.created); sb.append((char) 13); sb.append((char) 10);
sb.append("# Structure="); sb.append(this.structure.toString()); sb.append((char) 13); sb.append((char) 10);
sb.append("# ---"); sb.append((char) 13); sb.append((char) 10);
final Iterator<Map.Entry<String, Object>> i = entries.entrySet().iterator();
Map.Entry<String, Object> entry;
String k;
Object v;
while (i.hasNext()) {
entry = i.next();
for (final Map.Entry<String, Object> entry : entries.entrySet()) {
k = entry.getKey();
v = entry.getValue();
sb.append(k); sb.append('=');
@ -202,7 +199,7 @@ public class AttrSeq {
return new Entry(pivot, new HashMap<String, Long>(), (tree) ? (Set<String>) new TreeSet<String>() : (Set<String>) new HashSet<String>());
}
public Entry newEntry(final String pivot, final HashMap<String, Long> props, final Set<String> seq) {
public Entry newEntry(final String pivot, final Map<String, Long> props, final Set<String> seq) {
return new Entry(pivot, props, seq);
}
@ -322,6 +319,7 @@ public class AttrSeq {
seqrow = new Row(new String(rowdef), null);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(100);
sb.append('<'); sb.append(pivot_name); sb.append('-'); sb.append(Integer.toString(pivot_len)); sb.append(">,'=',");
@ -342,10 +340,10 @@ public class AttrSeq {
public class Entry {
String pivot;
HashMap<String, Long> attrs;
Map<String, Long> attrs;
Set<String> seq;
public Entry(final String pivot, final HashMap<String, Long> attrs, final Set<String> seq) {
public Entry(final String pivot, final Map<String, Long> attrs, final Set<String> seq) {
this.pivot = pivot;
this.attrs = attrs;
this.seq = seq;
@ -373,7 +371,7 @@ public class AttrSeq {
}
}
public HashMap<String, Long> getAttrs() {
public Map<String, Long> getAttrs() {
return attrs;
}
@ -408,6 +406,7 @@ public class AttrSeq {
this.seq.add(s/*, seqattrs*/);
}
@Override
public String toString() {
// creates only the attribute field and the sequence, not the pivot
final StringBuilder sb = new StringBuilder(100 + structure.seq_len[0] * seq.size());
@ -450,7 +449,7 @@ public class AttrSeq {
public static void main(final String[] args) {
// java -classpath source de.anomic.kelondro.kelondroPropFile -transcode DATA/RANKING/GLOBAL/CRG-test-unsorted-original.cr DATA/RANKING/GLOBAL/CRG-test-generated.cr
try {
if ((args.length == 3) && (args[0].equals("-transcode"))) {
if ((args.length == 3) && ("-transcode".equals(args[0]))) {
transcode(new File(args[1]), new File(args[2]));
}
} catch (final IOException e) {

@ -1,4 +1,4 @@
// indexDefaultReference.java
// Blacklist.java
// (C) 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 11.07.2005 on http://yacy.net
//
@ -23,7 +23,6 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.repository;
import java.io.File;
@ -50,123 +49,145 @@ import net.yacy.kelondro.util.SetTools;
public class Blacklist {
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_SURFTIPS = "surftips";
public static final String BLACKLIST_NEWS = "news";
public static final String BLACKLIST_NEWS = "news";
public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static final int ERR_TWO_WILDCARDS_IN_HOST = 1;
public static final int ERR_SUBDOMAIN_XOR_WILDCARD = 2;
public static final int ERR_PATH_REGEX = 3;
public static final int ERR_WILDCARD_BEGIN_OR_END = 4;
public static final int ERR_HOST_WRONG_CHARS = 5;
public static final int ERR_DOUBLE_OCCURANCE = 6;
public static final int ERR_HOST_REGEX = 7;
protected static final HashSet<String> BLACKLIST_TYPES = new HashSet<String>(Arrays.asList(new String[]{
Blacklist.BLACKLIST_CRAWLER,
Blacklist.BLACKLIST_PROXY,
Blacklist.BLACKLIST_DHT,
Blacklist.BLACKLIST_SEARCH,
Blacklist.BLACKLIST_SURFTIPS,
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips,news";
public static enum BlacklistError {
NO_ERROR(0),
TWO_WILDCARDS_IN_HOST(1),
SUBDOMAIN_XOR_WILDCARD(2),
PATH_REGEX(3),
WILDCARD_BEGIN_OR_END(4),
HOST_WRONG_CHARS(5),
DOUBLE_OCCURANCE(6),
HOST_REGEX(7);
final int errorCode;
BlacklistError(final int errorCode) {
this.errorCode = errorCode;
}
public int getInt() {
return errorCode;
}
public long getLong() {
return (long) errorCode;
}
}
protected static final Set<String> BLACKLIST_TYPES = new HashSet<String>(Arrays.asList(new String[]{
Blacklist.BLACKLIST_CRAWLER,
Blacklist.BLACKLIST_PROXY,
Blacklist.BLACKLIST_DHT,
Blacklist.BLACKLIST_SEARCH,
Blacklist.BLACKLIST_SURFTIPS,
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
protected File blacklistRootPath = null;
protected HashMap<String, HandleSet> cachedUrlHashs = null;
protected HashMap<String, HandleSet> cachedUrlHashs = null;
//protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths_matchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
protected HashMap<String, HashMap<String, ArrayList<String>>> hostpaths_notmatchable = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
this.setRootPath(rootPath);
this.blacklistRootPath = rootPath;
// prepare the data structure
//this.hostpaths = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.hostpaths_matchable = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.hostpaths_notmatchable = new HashMap<String, HashMap<String, ArrayList<String>>>();
this.cachedUrlHashs = new HashMap<String, HandleSet>();
final Iterator<String> iter = BLACKLIST_TYPES.iterator();
while (iter.hasNext()) {
final String blacklistType = iter.next();
for (final String blacklistType : BLACKLIST_TYPES) {
//this.hostpaths.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.hostpaths_matchable.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.hostpaths_notmatchable.put(blacklistType, new HashMap<String, ArrayList<String>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}
}
public void setRootPath(final File rootPath) {
if (rootPath == null)
if (rootPath == null) {
throw new NullPointerException("The blacklist root path must not be null.");
if (!rootPath.isDirectory())
}
if (!rootPath.isDirectory()) {
throw new IllegalArgumentException("The blacklist root path is not a directory.");
if (!rootPath.canRead())
}
if (!rootPath.canRead()) {
throw new IllegalArgumentException("The blacklist root path is not readable.");
}
this.blacklistRootPath = rootPath;
}
protected HashMap<String, ArrayList<String>> getBlacklistMap(final String blacklistType,final boolean matchable) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown blacklist type: "+blacklistType+".");
return (matchable)? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType);
protected Map<String, ArrayList<String>> getBlacklistMap(final String blacklistType, final boolean matchable) {
if (blacklistType == null) {
throw new IllegalArgumentException();
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + ".");
}
return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType);
}
protected HandleSet getCacheUrlHashsSet(final String blacklistType) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
if (blacklistType == null) {
throw new IllegalArgumentException();
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown backlist type.");
}
return this.cachedUrlHashs.get(blacklistType);
}
public void clear() {
for (final HashMap<String, ArrayList<String>> entry: this.hostpaths_matchable.values()) {
for (final Map<String, ArrayList<String>> entry : this.hostpaths_matchable.values()) {
entry.clear();
}
for (final HashMap<String, ArrayList<String>> entry: this.hostpaths_notmatchable.values()) {
for (final Map<String, ArrayList<String>> entry : this.hostpaths_notmatchable.values()) {
entry.clear();
}
for (final HandleSet entry: this.cachedUrlHashs.values()) {
for (final HandleSet entry : this.cachedUrlHashs.values()) {
entry.clear();
}
}
public int size() {
int size = 0;
for(final String entry: this.hostpaths_matchable.keySet()) {
for(final ArrayList<String> ientry: this.hostpaths_matchable.get(entry).values()) {
for (final String entry : this.hostpaths_matchable.keySet()) {
for (final ArrayList<String> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size();
}
}
for(final String entry: this.hostpaths_notmatchable.keySet()) {
for(final ArrayList<String> ientry: this.hostpaths_notmatchable.get(entry).values()) {
for (final String entry : this.hostpaths_notmatchable.keySet()) {
for (final ArrayList<String> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size();
}
}
return size;
}
public void loadList(final BlacklistFile[] blFiles, final String sep) {
public void loadList(final BlacklistFile[] blFiles, final String sep) {
for (int j = 0; j < blFiles.length; j++) {
final BlacklistFile blf = blFiles[j];
loadList(blf.getType(), blf.getFileName(), sep);
}
}
private void loadList(final BlacklistFile blFile, final String sep) {
final HashMap<String, ArrayList<String>> blacklistMapMatch = getBlacklistMap(blFile.getType(),true);
final HashMap<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(),false);
final Map<String, ArrayList<String>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, ArrayList<String>>> loadedBlacklist;
Map.Entry<String, ArrayList<String>> loadedEntry;
ArrayList<String> paths;
@ -180,21 +201,22 @@ public class Blacklist {
try {
file.createNewFile();
} catch (final IOException e) { /* */ }
// join all blacklists from files into one internal blacklist map
loadedBlacklist = SetTools.loadMapMultiValsPerKey(file.toString(), sep).entrySet();
for (final Iterator<Map.Entry<String, ArrayList<String>>> mi = loadedBlacklist.iterator(); mi.hasNext(); ) {
for (final Iterator<Map.Entry<String, ArrayList<String>>> mi = loadedBlacklist.iterator(); mi.hasNext();) {
loadedEntry = mi.next();
loadedPaths = loadedEntry.getValue();
// create new entry if host mask unknown, otherwise merge
// existing one with path patterns from blacklist file
paths = (isMatchable(loadedEntry.getKey())) ? blacklistMapMatch.get(loadedEntry.getKey()) : blacklistMapNotMatch.get(loadedEntry.getKey());
if (paths == null) {
if(isMatchable(loadedEntry.getKey()))
if (isMatchable(loadedEntry.getKey())) {
blacklistMapMatch.put(loadedEntry.getKey(), loadedPaths);
else
} else {
blacklistMapNotMatch.put(loadedEntry.getKey(), loadedPaths);
}
} else {
// TODO check for duplicates? (refactor List -> Set)
paths.addAll(loadedPaths);
@ -203,51 +225,63 @@ public class Blacklist {
}
}
}
public void loadList(final String blacklistType, final String fileNames, final String sep) {
// method for not breaking older plasmaURLPattern interface
final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType);
loadList(blFile, sep);
}
public void removeAll(final String blacklistType, final String host) {
getBlacklistMap(blacklistType,true).remove(host);
getBlacklistMap(blacklistType,false).remove(host);
getBlacklistMap(blacklistType, true).remove(host);
getBlacklistMap(blacklistType, false).remove(host);
}
public void remove(final String blacklistType, final String host, final String path) {
final HashMap<String, ArrayList<String>> blacklistMap = getBlacklistMap(blacklistType,true);
final Map<String, ArrayList<String>> blacklistMap = getBlacklistMap(blacklistType, true);
ArrayList<String> hostList = blacklistMap.get(host);
if(hostList != null) {
if (hostList != null) {
hostList.remove(path);
if (hostList.isEmpty())
if (hostList.isEmpty()) {
blacklistMap.remove(host);
}
}
final HashMap<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blacklistType,false);
final Map<String, ArrayList<String>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
hostList = blacklistMapNotMatch.get(host);
if (hostList != null) {
hostList.remove(path);
if (hostList.isEmpty())
if (hostList.isEmpty()) {
blacklistMapNotMatch.remove(host);
}
}
}
public void add(final String blacklistType, String host, String path) {
if (host == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
HashMap<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType,true) : getBlacklistMap(blacklistType,false);
if (host == null) {
throw new NullPointerException();
}
if (path == null) {
throw new NullPointerException();
}
if (path.length() > 0 && path.charAt(0) == '/') {
path = path.substring(1);
}
Map<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false);
// avoid PatternSyntaxException e
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') host = "." + host;
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') {
host = "." + host;
}
ArrayList<String> hostList = blacklistMap.get(host.toLowerCase());
if (hostList == null) blacklistMap.put(host.toLowerCase(), (hostList = new ArrayList<String>()));
if (hostList == null) {
blacklistMap.put(host.toLowerCase(), (hostList = new ArrayList<String>()));
}
hostList.add(path);
}
@ -258,11 +292,11 @@ public class Blacklist {
final HandleSet blacklistMap = this.cachedUrlHashs.get(iter.next());
size += blacklistMap.size();
}
return size;
return size;
}
public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) {
final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
return urlHashCache.has(urlHash);
}
@ -270,21 +304,27 @@ public class Blacklist {
boolean ret = false;
if (blacklistType != null && host != null && path != null) {
HashMap<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType,true) : getBlacklistMap(blacklistType,false);
Map<String, ArrayList<String>> blacklistMap;
blacklistMap = (isMatchable(host)) ? getBlacklistMap(blacklistType, true) : getBlacklistMap(blacklistType, false);
// avoid PatternSyntaxException e
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') host = "." + host;
if (!isMatchable(host) && host.length() > 0 && host.charAt(0) == '*') {
host = "." + host;
}
ArrayList<String> hostList = blacklistMap.get(host.toLowerCase());
if (hostList != null) ret = hostList.contains(path);
if (hostList != null) {
ret = hostList.contains(path);
}
}
return ret;
}
public boolean isListed(final String blacklistType, final DigestURI url) {
if (url.getHost() == null) return false;
final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
if (url.getHost() == null) {
return false;
}
final HandleSet urlHashCache = getCacheUrlHashsSet(blacklistType);
if (!urlHashCache.has(url.hash())) {
final boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
if (temp) {
@ -294,24 +334,30 @@ public class Blacklist {
Log.logException(e);
}
}
return temp;
}
return true;
return temp;
}
return true;
}
public static boolean isMatchable (final String host) {
public static boolean isMatchable(final String host) {
try {
if(Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net)
if (Pattern.matches("^[a-z0-9.-]*$", host)) // simple Domain (yacy.net or www.yacy.net)
{
return true;
if(Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot)
}
if (Pattern.matches("^\\*\\.[a-z0-9-.]*$", host)) // start with *. (not .* and * must follow a dot)
{
return true;
if(Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and befor * must be a dot)
}
if (Pattern.matches("^[a-z0-9-.]*\\.\\*$", host)) // ends with .* (not *. and befor * must be a dot)
{
return true;
}
} catch (final PatternSyntaxException e) {
//System.out.println(e.toString());
return false;
}
return false;
return false;
}
public String getEngineInfo() {
@ -319,27 +365,33 @@ public class Blacklist {
}
public boolean isListed(final String blacklistType, final String hostlow, String path) {
if (hostlow == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (hostlow == null) {
throw new NullPointerException();
}
if (path == null) {
throw new NullPointerException();
}
// getting the proper blacklist
final HashMap<String, ArrayList<String>> blacklistMapMatched = getBlacklistMap(blacklistType,true);
final Map<String, ArrayList<String>> blacklistMapMatched = getBlacklistMap(blacklistType, true);
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
ArrayList<String> app;
if (path.length() > 0 && path.charAt(0) == '/') {
path = path.substring(1);
}
List<String> app;
boolean matched = false;
String pp = ""; // path-pattern
// try to match complete domain
if (!matched && (app = blacklistMapMatched.get(hostlow)) != null) {
for (int i=app.size()-1; !matched && i>-1; i--) {
for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i);
if (pp.indexOf("?*") > 0) {
// prevent "Dangling meta character '*'" exception
Log.logWarning("Blacklist", "ignored blacklist path to prevent 'Dangling meta character' exception: " + pp);
continue;
}
matched |= ((pp.equals("*")) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (path.matches(pp)));
}
}
// first try to match the domain with wildcard '*'
@ -347,47 +399,48 @@ public class Blacklist {
int index = 0;
while (!matched && (index = hostlow.indexOf('.', index + 1)) != -1) {
if ((app = blacklistMapMatched.get(hostlow.substring(0, index + 1) + "*")) != null) {
for (int i=app.size()-1; !matched && i>-1; i--) {
for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i);
matched |= ((pp.equals("*")) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (path.matches(pp)));
}
}
if ((app = blacklistMapMatched.get(hostlow.substring(0, index))) != null) {
for (int i=app.size()-1; !matched && i>-1; i--) {
for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i);
matched |= ((pp.equals("*")) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (path.matches(pp)));
}
}
}
index = hostlow.length();
while (!matched && (index = hostlow.lastIndexOf('.', index - 1)) != -1) {
if ((app = blacklistMapMatched.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
for (int i=app.size()-1; !matched && i>-1; i--) {
for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i);
matched |= ((pp.equals("*")) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (path.matches(pp)));
}
}
if ((app = blacklistMapMatched.get(hostlow.substring(index +1, hostlow.length()))) != null) {
for (int i=app.size()-1; !matched && i>-1; i--) {
if ((app = blacklistMapMatched.get(hostlow.substring(index + 1, hostlow.length()))) != null) {
for (int i = app.size() - 1; !matched && i > -1; i--) {
pp = app.get(i);
matched |= ((pp.equals("*")) || (path.matches(pp)));
matched |= (("*".equals(pp)) || (path.matches(pp)));
}
}
}
// loop over all Regexentrys
if(!matched) {
final HashMap<String, ArrayList<String>> blacklistMapNotMatched = getBlacklistMap(blacklistType,false);
if (!matched) {
final Map<String, ArrayList<String>> blacklistMapNotMatched = getBlacklistMap(blacklistType, false);
String key;
for(final Entry<String, ArrayList<String>> entry: blacklistMapNotMatched.entrySet()) {
for (final Entry<String, ArrayList<String>> entry : blacklistMapNotMatched.entrySet()) {
key = entry.getKey();
try {
if(Pattern.matches(key, hostlow)) {
if (Pattern.matches(key, hostlow)) {
app = entry.getValue();
for (int i=0; i<app.size(); i++) {
if(Pattern.matches(app.get(i), path))
for (int i = 0; i < app.size(); i++) {
if (Pattern.matches(app.get(i), path)) {
return true;
}
}
}
} catch (final PatternSyntaxException e) {
@ -398,7 +451,7 @@ public class Blacklist {
return matched;
}
public int checkError(String element, Map<String, String> properties) {
public BlacklistError checkError(String element, Map<String, String> properties) {
boolean allowRegex = true;
int slashPos;
@ -422,36 +475,36 @@ public class Blacklist {
// check whether host begins illegally
if (!host.matches("([A-Za-z0-9_-]+|\\*)(\\.([A-Za-z0-9_-]+|\\*))*")) {
if (i == 0 && host.length() > 1 && host.charAt(1) != '.') {
return ERR_SUBDOMAIN_XOR_WILDCARD;
return BlacklistError.SUBDOMAIN_XOR_WILDCARD;
}
return ERR_HOST_WRONG_CHARS;
return BlacklistError.HOST_WRONG_CHARS;
}
// in host-part only full sub-domains may be wildcards
if (host.length() > 0 && i > -1) {
if (!(i == 0 || i == host.length() - 1)) {
return ERR_WILDCARD_BEGIN_OR_END;
return BlacklistError.WILDCARD_BEGIN_OR_END;
}
if (i == host.length() - 1 && host.length() > 1 && host.charAt(i - 1) != '.') {
return ERR_SUBDOMAIN_XOR_WILDCARD;
return BlacklistError.SUBDOMAIN_XOR_WILDCARD;
}
}
// check for double-occurences of "*" in host
if (host.indexOf("*", i + 1) > -1) {
return ERR_TWO_WILDCARDS_IN_HOST;
return BlacklistError.TWO_WILDCARDS_IN_HOST;
}
} else if (allowRegex && !isValidRegex(host)) {
return ERR_HOST_REGEX;
return BlacklistError.HOST_REGEX;
}
// check for errors on regex-compiling path
if (!isValidRegex(path) && !path.equals("*")) {
return ERR_PATH_REGEX;
return BlacklistError.PATH_REGEX;
}
return 0;
return BlacklistError.DOUBLE_OCCURANCE.NO_ERROR;
}
/**
@ -469,10 +522,12 @@ public class Blacklist {
}
return ret;
}
public static final String defaultBlacklist(final File listsPath) {
public static String defaultBlacklist(final File listsPath) {
List<String> dirlist = FileUtils.getDirListing(listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
if (dirlist.isEmpty()) return null;
if (dirlist.isEmpty()) {
return null;
}
return dirlist.get(0);
}
@ -490,5 +545,4 @@ public class Blacklist {
}
return ret;
}
}

Loading…
Cancel
Save