enhanced network scanner, is faster and more flexible now

- start more processes
- remove superfluous host name resolution
- better/more flexible subnet ip range calculation
- prefer ipv4 makes better usable ip pre-settings in servlet
- extended servlet by new subnet /20 - option
- redesign of scanner start process in servlet (generalization)
pull/1/head
Michael Peter Christen 12 years ago
parent 592adf7ccb
commit d1cb4cbc84

@ -54,7 +54,7 @@
</dd> </dd>
<dt>Subnet</dt> <dt>Subnet</dt>
<dd> <dd>
<input type="radio" name="subnet" value="24" checked="checked"/>/24 <input type="radio" name="subnet" value="16"/>/16 <input type="radio" name="subnet" value="24" checked="checked"/>/24 (254 addresses) <input type="radio" name="subnet" value="20"/>/20 (4064 addresses) <input type="radio" name="subnet" value="16"/>/16 (65024 adresses)
</dd> </dd>
<dt>Scan Cache</dt> <dt>Scan Cache</dt>
<dd> <dd>

@ -23,6 +23,7 @@ import java.net.MalformedURLException;
import java.util.ConcurrentModificationException; import java.util.ConcurrentModificationException;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
@ -44,7 +45,7 @@ import net.yacy.server.serverSwitch;
public class CrawlStartScanner_p public class CrawlStartScanner_p
{ {
private final static int CONCURRENT_RUNNER = 100; private final static int CONCURRENT_RUNNER = 200;
public static serverObjects respond( public static serverObjects respond(
@SuppressWarnings("unused") final RequestHeader header, @SuppressWarnings("unused") final RequestHeader header,
@ -100,71 +101,38 @@ public class CrawlStartScanner_p
repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays repeat_unit = post.get("repeat_unit", "selminutes"); // selminutes, selhours, seldays
} }
final boolean bigrange = post.get("subnet", "24").equals("16"); final int subnet = post.getInt("subnet", 24);
// case: an IP range was given; scan the range for services and display result // scan a range of ips
if ( post.containsKey("scan") && "hosts".equals(post.get("source", "")) ) { if (post.containsKey("scan")) {
final Set<InetAddress> ia = new HashSet<InetAddress>(); final Set<InetAddress> scanbase = new HashSet<InetAddress>();
// select host base to scan
if ("hosts".equals(post.get("source", ""))) {
for (String host: hosts.split(",")) { for (String host: hosts.split(",")) {
if ( host.startsWith("http://") ) { if (host.startsWith("http://")) host = host.substring(7);
host = host.substring(7); if (host.startsWith("https://")) host = host.substring(8);
} if (host.startsWith("ftp://")) host = host.substring(6);
if ( host.startsWith("https://") ) { if (host.startsWith("smb://")) host = host.substring(6);
host = host.substring(8);
}
if ( host.startsWith("ftp://") ) {
host = host.substring(6);
}
if ( host.startsWith("smb://") ) {
host = host.substring(6);
}
final int p = host.indexOf('/', 0); final int p = host.indexOf('/', 0);
if ( p >= 0 ) { if (p >= 0) host = host.substring(0, p);
host = host.substring(0, p); if (host.length() > 0) scanbase.add(Domains.dnsResolve(host));
}
ia.add(Domains.dnsResolve(host));
}
final Scanner scanner = new Scanner(ia, CONCURRENT_RUNNER, timeout);
if ( post.get("scanftp", "").equals("on") ) {
scanner.addFTP(bigrange);
}
if ( post.get("scanhttp", "").equals("on") ) {
scanner.addHTTP(bigrange);
}
if ( post.get("scanhttps", "").equals("on") ) {
scanner.addHTTPS(bigrange);
}
if ( post.get("scansmb", "").equals("on") ) {
scanner.addSMB(bigrange);
} }
scanner.start();
scanner.terminate();
if ( "on".equals(post.get("accumulatescancache", ""))
&& !"scheduler".equals(post.get("rescan", "")) ) {
Scanner.scancacheExtend(scanner);
} else {
Scanner.scancacheReplace(scanner);
} }
if ("intranet".equals(post.get("source", ""))) {
scanbase.addAll(Domains.myIntranetIPs());
} }
if ( post.containsKey("scan") && "intranet".equals(post.get("source", "")) ) { // start a scanner
final Scanner scanner = new Scanner(Domains.myIntranetIPs(), CONCURRENT_RUNNER, timeout); final Scanner scanner = new Scanner(scanbase, CONCURRENT_RUNNER, timeout);
if ( "on".equals(post.get("scanftp", "")) ) { List<InetAddress> addresses = scanner.genlist(subnet);
scanner.addFTP(bigrange); if ("on".equals(post.get("scanftp", ""))) scanner.addFTP(addresses);
} if ("on".equals(post.get("scanhttp", ""))) scanner.addHTTP(addresses);
if ( "on".equals(post.get("scanhttp", "")) ) { if ("on".equals(post.get("scanhttps", ""))) scanner.addHTTPS(addresses);
scanner.addHTTP(bigrange); if ("on".equals(post.get("scansmb", ""))) scanner.addSMB(addresses);
}
if ( "on".equals(post.get("scanhttps", "")) ) {
scanner.addHTTPS(bigrange);
}
if ( "on".equals(post.get("scansmb", "")) ) {
scanner.addSMB(bigrange);
}
scanner.start(); scanner.start();
scanner.terminate(); scanner.terminate();
if ( "on".equals(post.get("accumulatescancache", "")) if ("on".equals(post.get("accumulatescancache", "")) && !"scheduler".equals(post.get("rescan", ""))) {
&& !"scheduler".equals(post.get("rescan", "")) ) {
Scanner.scancacheExtend(scanner); Scanner.scancacheExtend(scanner);
} else { } else {
Scanner.scancacheReplace(scanner); Scanner.scancacheReplace(scanner);
@ -193,8 +161,7 @@ public class CrawlStartScanner_p
final byte[] pk = entry.getValue().substring(5).getBytes(); final byte[] pk = entry.getValue().substring(5).getBytes();
final DigestURI url = pkmap.get(pk); final DigestURI url = pkmap.get(pk);
if ( url != null ) { if ( url != null ) {
String path = String path = "/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
"/Crawler_p.html?createBookmark=off&xsstopw=off&crawlingDomMaxPages=10000&intention=&range=domain&indexMedia=on&recrawl=nodoubles&xdstopw=off&storeHTCache=on&sitemapURL=&repeat_time=7&crawlingQ=on&cachePolicy=iffresh&indexText=on&crawlingMode=url&mustnotmatch=&crawlingDomFilterDepth=1&crawlingDomFilterCheck=off&crawlingstart=Start%20New%20Crawl&xpstopw=off&repeat_unit=seldays&crawlingDepth=99&directDocByURL=off";
path += "&crawlingURL=" + url.toNormalform(true); path += "&crawlingURL=" + url.toNormalform(true);
WorkTables.execAPICall( WorkTables.execAPICall(
Domains.LOCALHOST, Domains.LOCALHOST,

@ -993,7 +993,7 @@ public class Domains {
final Set<InetAddress> list = new HashSet<InetAddress>(); final Set<InetAddress> list = new HashSet<InetAddress>();
if (localHostAddresses.isEmpty()) return list; // give up if (localHostAddresses.isEmpty()) return list; // give up
for (final InetAddress a: localHostAddresses) { for (final InetAddress a: localHostAddresses) {
if ((0Xff & a.getAddress()[0]) == 127 || LOCAL_PATTERNS.matcher(a.getHostAddress()).matches()) continue; if ((0Xff & a.getAddress()[0]) == 127) continue;
list.add(a); list.add(a);
} }
return list; return list;

@ -212,7 +212,7 @@ public class Scanner extends Thread {
Service uri; Service uri;
try { try {
while ((uri = this.scanqueue.take()) != POISONSERVICE) { while ((uri = this.scanqueue.take()) != POISONSERVICE) {
Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getHostName()); // good for debugging Thread.currentThread().setName("Scanner Start Loop; now: " + uri.getInetAddress()); // good for debugging
while (this.runner.size() >= this.runnerCount) { while (this.runner.size() >= this.runnerCount) {
/*for (Runner r: runner.keySet()) { /*for (Runner r: runner.keySet()) {
if (r.age() > 3000) synchronized(r) { r.interrupt(); } if (r.age() > 3000) synchronized(r) { r.interrupt(); }
@ -301,24 +301,24 @@ public class Scanner extends Thread {
} }
} }
public void addHTTP(final boolean bigrange) { public void addHTTP(final List<InetAddress> addresses) {
addProtocol(Protocol.http, bigrange); addProtocol(Protocol.http, addresses);
} }
public void addHTTPS(final boolean bigrange) { public void addHTTPS(final List<InetAddress> addresses) {
addProtocol(Protocol.https, bigrange); addProtocol(Protocol.https, addresses);
} }
public void addSMB(final boolean bigrange) { public void addSMB(final List<InetAddress> addresses) {
addProtocol(Protocol.smb, bigrange); addProtocol(Protocol.smb, addresses);
} }
public void addFTP(final boolean bigrange) { public void addFTP(final List<InetAddress> addresses) {
addProtocol(Protocol.ftp, bigrange); addProtocol(Protocol.ftp, addresses);
} }
private void addProtocol(final Protocol protocol, final boolean bigrange) { private void addProtocol(final Protocol protocol, final List<InetAddress> addresses) {
for (final InetAddress i: genlist(bigrange)) { for (final InetAddress i: addresses) {
try { try {
this.scanqueue.put(new Service(protocol, i)); this.scanqueue.put(new Service(protocol, i));
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
@ -326,10 +326,16 @@ public class Scanner extends Thread {
} }
} }
private final List<InetAddress> genlist(final boolean bigrange) { /**
* generate a list of internetaddresses
* @param subnet the subnet: 24 will generate 254 addresses, 16 will generate 256 * 254; must be >= 16 and <= 24
* @return
*/
public final List<InetAddress> genlist(final int subnet) {
final ArrayList<InetAddress> c = new ArrayList<InetAddress>(10); final ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (final InetAddress i: this.scanrange) { for (final InetAddress i: this.scanrange) {
for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) { int ul = subnet >= 24 ? i.getAddress()[2] : (1 << (24 - subnet)) - 1;
for (int br = subnet >= 24 ? i.getAddress()[2] : 0; br <= ul; br++) {
for (int j = 1; j < 255; j++) { for (int j = 1; j < 255; j++) {
final byte[] address = i.getAddress(); final byte[] address = i.getAddress();
address[2] = (byte) br; address[2] = (byte) br;
@ -358,10 +364,11 @@ public class Scanner extends Thread {
public static void main(final String[] args) { public static void main(final String[] args) {
//try {System.out.println("192.168.1.91: " + ping(new MultiProtocolURI("smb://192.168.1.91/"), 1000));} catch (MalformedURLException e) {} //try {System.out.println("192.168.1.91: " + ping(new MultiProtocolURI("smb://192.168.1.91/"), 1000));} catch (MalformedURLException e) {}
final Scanner scanner = new Scanner(100, 10); final Scanner scanner = new Scanner(100, 10);
scanner.addFTP(false); List<InetAddress> addresses = scanner.genlist(20);
scanner.addHTTP(false); scanner.addFTP(addresses);
scanner.addHTTPS(false); scanner.addHTTP(addresses);
scanner.addSMB(false); scanner.addHTTPS(addresses);
scanner.addSMB(addresses);
scanner.start(); scanner.start();
scanner.terminate(); scanner.terminate();
for (final Service service: scanner.services().keySet()) { for (final Service service: scanner.services().keySet()) {

@ -40,6 +40,8 @@ import java.nio.channels.FileLock;
import java.util.Properties; import java.util.Properties;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import sun.security.action.GetBooleanAction;
import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.lod.JenaTripleStore;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
@ -597,6 +599,7 @@ public final class yacy {
if (OS.isWindows) headless = false; if (OS.isWindows) headless = false;
if (args.length >= 1 && args[0].toLowerCase().equals("-gui")) headless = false; if (args.length >= 1 && args[0].toLowerCase().equals("-gui")) headless = false;
System.setProperty("java.awt.headless", headless ? "true" : "false"); System.setProperty("java.awt.headless", headless ? "true" : "false");
System.setProperty("java.net.preferIPv4Stack", "true");
String s = ""; for (final String a: args) s += a + " "; String s = ""; for (final String a: args) s += a + " ";
yacyRelease.startParameter = s.trim(); yacyRelease.startParameter = s.trim();

Loading…
Cancel
Save