- restructuring to allow different import tasks to be controlled via one gui - adding possibility to import a single assortment file - adding possibility to set the cache size that should be used git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1504 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
56936139ae
commit
6a99304b2b
@ -0,0 +1,113 @@
|
|||||||
|
package de.anomic.plasma.dbImport;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.server.logging.serverLog;
|
||||||
|
|
||||||
|
public abstract class AbstractImporter extends Thread implements dbImporter{
|
||||||
|
|
||||||
|
protected int jobID;
|
||||||
|
protected String jobType;
|
||||||
|
protected serverLog log;
|
||||||
|
protected boolean stopped = false;
|
||||||
|
protected boolean paused = false;
|
||||||
|
|
||||||
|
protected plasmaSwitchboard sb;
|
||||||
|
protected File importPath;
|
||||||
|
protected int cacheSize;
|
||||||
|
|
||||||
|
protected long globalStart = System.currentTimeMillis();
|
||||||
|
protected long globalEnd;
|
||||||
|
protected String error;
|
||||||
|
|
||||||
|
public AbstractImporter(plasmaSwitchboard theSb) {
|
||||||
|
super(theSb.dbImportManager.runningJobs,"");
|
||||||
|
this.sb = theSb;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getError() {
|
||||||
|
return this.error;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init(File theImportPath) {
|
||||||
|
this.importPath = theImportPath;
|
||||||
|
|
||||||
|
this.jobID = this.sb.dbImportManager.getJobID();
|
||||||
|
this.log = new serverLog("IMPORT_" + this.jobType + "_" + this.jobID);
|
||||||
|
this.setName("IMPORT_" + this.jobType + "_" + this.sb.dbImportManager.getJobID());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startIt() {
|
||||||
|
this.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stopIt() throws InterruptedException {
|
||||||
|
this.stopped = true;
|
||||||
|
this.continueIt();
|
||||||
|
this.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void pauseIt() {
|
||||||
|
synchronized(this) {
|
||||||
|
this.paused = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void continueIt() {
|
||||||
|
synchronized(this) {
|
||||||
|
if (this.paused) {
|
||||||
|
this.paused = false;
|
||||||
|
this.notifyAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isPaused() {
|
||||||
|
synchronized(this) {
|
||||||
|
return this.paused;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean isAborted() {
|
||||||
|
synchronized(this) {
|
||||||
|
if (this.paused) {
|
||||||
|
try {
|
||||||
|
this.wait();
|
||||||
|
}
|
||||||
|
catch (InterruptedException e){}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (this.stopped) || Thread.currentThread().isInterrupted();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isStopped() {
|
||||||
|
return this.isAlive();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getJobID() {
|
||||||
|
return this.jobID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTotalRuntime() {
|
||||||
|
return (this.globalEnd == 0)?System.currentTimeMillis()-this.globalStart:this.globalEnd-this.globalStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getElapsedTime() {
|
||||||
|
return System.currentTimeMillis()-this.globalStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJobType() {
|
||||||
|
return this.jobType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public File getImportPath() {
|
||||||
|
return this.importPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract long getEstimatedTime();
|
||||||
|
public abstract String getJobName();
|
||||||
|
public abstract int getProcessingStatusPercent();
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,115 @@
|
|||||||
|
package de.anomic.plasma.dbImport;
|
||||||
|
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.server.logging.serverLog;
|
||||||
|
|
||||||
|
public class dbImportManager {
|
||||||
|
|
||||||
|
public final Vector finishedJobs = new Vector();
|
||||||
|
public final ThreadGroup runningJobs = new ThreadGroup("ImporterThreads");
|
||||||
|
public int currMaxJobNr = 0;
|
||||||
|
private plasmaSwitchboard sb;
|
||||||
|
|
||||||
|
public dbImportManager(plasmaSwitchboard theSb) {
|
||||||
|
this.sb = theSb;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getJobID() {
|
||||||
|
int jobID;
|
||||||
|
synchronized(runningJobs) {
|
||||||
|
jobID = currMaxJobNr;
|
||||||
|
currMaxJobNr++;
|
||||||
|
}
|
||||||
|
return jobID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public dbImporter[] getRunningImporter() {
|
||||||
|
Thread[] importThreads = new Thread[runningJobs.activeCount()*2];
|
||||||
|
int activeCount = runningJobs.enumerate(importThreads);
|
||||||
|
dbImporter[] importers = new dbImporter[activeCount];
|
||||||
|
for (int i=0; i<activeCount; i++) {
|
||||||
|
importers[i] = (dbImporter) importThreads[i];
|
||||||
|
}
|
||||||
|
return importers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public dbImporter[] getFinishedImporter() {
|
||||||
|
return (dbImporter[]) finishedJobs.toArray(new dbImporter[finishedJobs.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public dbImporter getImporterByID(int jobID) {
|
||||||
|
|
||||||
|
Thread[] importThreads = new Thread[this.runningJobs.activeCount()*2];
|
||||||
|
int activeCount = this.runningJobs.enumerate(importThreads);
|
||||||
|
|
||||||
|
for (int i=0; i < activeCount; i++) {
|
||||||
|
dbImporter currThread = (dbImporter) importThreads[i];
|
||||||
|
if (currThread.getJobID() == Integer.valueOf(jobID).intValue()) {
|
||||||
|
return currThread;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public dbImporter getNewImporter(String type) {
|
||||||
|
if (type == null) return null;
|
||||||
|
if (type.length() == 0) return null;
|
||||||
|
|
||||||
|
dbImporter newImporter = null;
|
||||||
|
if (type.equals("plasmaDB")) {
|
||||||
|
newImporter = new plasmaDbImporter(this.sb);
|
||||||
|
} else if (type.equalsIgnoreCase("ASSORTMENT")) {
|
||||||
|
newImporter = new plasmaWordIndexAssortmentImporter(this.sb);
|
||||||
|
}
|
||||||
|
return newImporter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Can be used to close all still running importer threads
|
||||||
|
* e.g. on server shutdown
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
/* waiting for all threads to finish */
|
||||||
|
int threadCount = runningJobs.activeCount();
|
||||||
|
Thread[] threadList = new Thread[threadCount];
|
||||||
|
threadCount = runningJobs.enumerate(threadList);
|
||||||
|
|
||||||
|
if (threadCount == 0) return;
|
||||||
|
|
||||||
|
serverLog log = new serverLog("DB-IMPORT");
|
||||||
|
try {
|
||||||
|
// trying to gracefull stop all still running sessions ...
|
||||||
|
log.logInfo("Signaling shutdown to " + threadCount + " remaining dbImporter threads ...");
|
||||||
|
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
|
||||||
|
Thread currentThread = threadList[currentThreadIdx];
|
||||||
|
if (currentThread.isAlive()) {
|
||||||
|
((plasmaDbImporter)currentThread).stopIt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// waiting a few ms for the session objects to continue processing
|
||||||
|
try { Thread.sleep(500); } catch (InterruptedException ex) {}
|
||||||
|
|
||||||
|
// interrupting all still running or pooled threads ...
|
||||||
|
log.logInfo("Sending interruption signal to " + runningJobs.activeCount() + " remaining dbImporter threads ...");
|
||||||
|
runningJobs.interrupt();
|
||||||
|
|
||||||
|
// we need to use a timeout here because of missing interruptable session threads ...
|
||||||
|
log.logFine("Waiting for " + runningJobs.activeCount() + " remaining dbImporter threads to finish shutdown ...");
|
||||||
|
for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) {
|
||||||
|
Thread currentThread = threadList[currentThreadIdx];
|
||||||
|
if (currentThread.isAlive()) {
|
||||||
|
log.logFine("Waiting for dbImporter thread '" + currentThread.getName() + "' [" + currentThreadIdx + "] to finish shutdown.");
|
||||||
|
try { currentThread.join(500); } catch (InterruptedException ex) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.logInfo("Shutdown of remaining dbImporter threads finished.");
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.logSevere("Unexpected error while trying to shutdown all remaining dbImporter threads.",e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,31 @@
|
|||||||
|
package de.anomic.plasma.dbImport;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
|
||||||
|
public interface dbImporter {
|
||||||
|
|
||||||
|
// functions to pause and continue importing
|
||||||
|
public boolean isPaused();
|
||||||
|
public void pauseIt();
|
||||||
|
public void continueIt();
|
||||||
|
public void stopIt() throws InterruptedException;
|
||||||
|
public boolean isStopped();
|
||||||
|
|
||||||
|
// getting status information
|
||||||
|
public long getTotalRuntime();
|
||||||
|
public long getElapsedTime();
|
||||||
|
public long getEstimatedTime();
|
||||||
|
public int getProcessingStatusPercent();
|
||||||
|
|
||||||
|
public int getJobID();
|
||||||
|
public String getJobName();
|
||||||
|
public String getJobType();
|
||||||
|
public File getImportPath();
|
||||||
|
public String getError();
|
||||||
|
public String getStatus();
|
||||||
|
|
||||||
|
public void init(File importPath, int cacheSize);
|
||||||
|
public void startIt();
|
||||||
|
}
|
@ -0,0 +1,122 @@
|
|||||||
|
package de.anomic.plasma.dbImport;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import de.anomic.plasma.plasmaSwitchboard;
|
||||||
|
import de.anomic.plasma.plasmaWordIndex;
|
||||||
|
import de.anomic.plasma.plasmaWordIndexAssortment;
|
||||||
|
import de.anomic.plasma.plasmaWordIndexEntryContainer;
|
||||||
|
|
||||||
|
public class plasmaWordIndexAssortmentImporter extends AbstractImporter implements dbImporter{
|
||||||
|
|
||||||
|
private int importStartSize;
|
||||||
|
private int wordEntityCount = 0;
|
||||||
|
private int wordEntryCount = 0;
|
||||||
|
|
||||||
|
private File importAssortmentFile;
|
||||||
|
private plasmaWordIndexAssortment assortmentFile;
|
||||||
|
|
||||||
|
public plasmaWordIndexAssortmentImporter(plasmaSwitchboard sb) {
|
||||||
|
super(sb);
|
||||||
|
this.jobType = "ASSORTMENT";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void init(File importAssortmentFile, int cacheSize) {
|
||||||
|
super.init(importAssortmentFile);
|
||||||
|
this.importAssortmentFile = importAssortmentFile;
|
||||||
|
this.cacheSize = cacheSize;
|
||||||
|
if (this.cacheSize < 2*1024*1024) this.cacheSize = 8*1024*1024;
|
||||||
|
|
||||||
|
String errorMsg = null;
|
||||||
|
if (!importAssortmentFile.getName().matches("indexAssortment0[0-6][0-9]\\.db")) errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
|
||||||
|
if (!importAssortmentFile.exists()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' does not exist.";
|
||||||
|
else if (importAssortmentFile.isDirectory()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is a directory.";
|
||||||
|
else if (!importAssortmentFile.canRead()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not readable.";
|
||||||
|
else if (!importAssortmentFile.canWrite()) errorMsg = "AssortmentFile '" + importAssortmentFile + "' is not writeable.";
|
||||||
|
|
||||||
|
|
||||||
|
File importAssortmentPath = null;
|
||||||
|
int assortmentNr = -1;
|
||||||
|
try {
|
||||||
|
importAssortmentPath = new File(importAssortmentFile.getParent());
|
||||||
|
assortmentNr = Integer.valueOf(importAssortmentFile.getName().substring("indexAssortment".length(),"indexAssortment".length()+3)).intValue();
|
||||||
|
if (assortmentNr <1 || assortmentNr > 64) {
|
||||||
|
errorMsg = "AssortmentFile '" + importAssortmentFile + "' has an invalid name.";
|
||||||
|
}
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
errorMsg = "Unable to parse the assortment file number.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorMsg != null) {
|
||||||
|
this.log.logSevere(errorMsg);
|
||||||
|
throw new IllegalStateException(errorMsg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
this.log.logInfo("Initializing source assortment file");
|
||||||
|
this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath,assortmentNr,8*1024*1024, this.log);
|
||||||
|
this.importStartSize = this.assortmentFile.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getEstimatedTime() {
|
||||||
|
return (this.wordEntityCount==0)?0:this.assortmentFile.size()*((System.currentTimeMillis()-this.globalStart)/this.wordEntityCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getJobName() {
|
||||||
|
return this.getImportPath().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getProcessingStatusPercent() {
|
||||||
|
return (this.wordEntityCount)/((this.importStartSize<100)?1:(this.importStartSize)/100);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getStatus() {
|
||||||
|
StringBuffer theStatus = new StringBuffer();
|
||||||
|
|
||||||
|
theStatus.append("#Word Entities=").append(this.wordEntityCount).append("\n");
|
||||||
|
theStatus.append("#Word Entries=").append(this.wordEntryCount);
|
||||||
|
|
||||||
|
return theStatus.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
Iterator contentIter = this.assortmentFile.content();
|
||||||
|
while (contentIter.hasNext()) {
|
||||||
|
this.wordEntityCount++;
|
||||||
|
|
||||||
|
byte[][] row = (byte[][]) contentIter.next();
|
||||||
|
String hash = new String(row[0]);
|
||||||
|
plasmaWordIndexEntryContainer container;
|
||||||
|
try {
|
||||||
|
container = this.assortmentFile.row2container(hash, row);
|
||||||
|
} catch (NullPointerException e) {
|
||||||
|
this.log.logWarning("NullpointerException detected in row with hash '" + hash + "'.");
|
||||||
|
if (this.wordEntityCount < this.importStartSize) continue;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.wordEntryCount += container.size();
|
||||||
|
|
||||||
|
// importing entity container to home db
|
||||||
|
this.sb.wordIndex.addEntries(container, true);
|
||||||
|
|
||||||
|
if (this.wordEntityCount % 500 == 0) {
|
||||||
|
this.log.logFine(this.wordEntityCount + " word entities processed so far.");
|
||||||
|
}
|
||||||
|
if (this.wordEntryCount % 2000 == 0) {
|
||||||
|
this.log.logFine(this.wordEntryCount + " word entries processed so far.");
|
||||||
|
}
|
||||||
|
if (isAborted()) break;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
this.error = e.toString();
|
||||||
|
this.log.logSevere("Error detected",e);
|
||||||
|
} finally {
|
||||||
|
this.globalEnd = System.currentTimeMillis();
|
||||||
|
this.sb.dbImportManager.finishedJobs.add(this);
|
||||||
|
this.assortmentFile.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in new issue