This file was deleted.

@@ -1,21 +1,126 @@
package DokumenteSucheforGUI;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Version;

public class SearchInDoc {
private Document Doc;
private List<IndexableField> IField;
private String FieldName;
private TermsEnum Terme;
private TokenStream TStream; // verarbeitet einzelne Buchstaben
private TokenFilter TFilter; // evtl. unnötig
private List<String> tokenList;

private Reader reader = null;
// https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/DocsEnum.html

public SearchInDoc(List<IndexableField> iField, String fieldName) {
// TODO Auto-generated constructor stub#
IField = iField;
FieldName = fieldName;
Iterator iter = IField.iterator();
while (iter.hasNext()) {
IndexableField currentField = (IndexableField) iter.next();
// System.out.println("?????????????"+ currentField.name() + " "+
// currentField.stringValue());
if (currentField.name().equals(FieldName)) {
System.out.println("\n !!!!!!!Feld gefunden");
// reader=currentField.readerValue();
// Analyzer analyzer = new Analyzer();

try {
Analyzer analyzer2 = new StandardAnalyzer(Version.LUCENE_45); // Soll
// Query
// analysieren
// tokenArray =
// tokensFromAnalysis(analyzer2,currentField.toString() );

// TStream= currentField.tokenStream(analyzer2 ); redundant

public SearchInDoc(Document doc) {
super();
Doc = doc;
TStream = analyzer2.tokenStream(FieldName, new StringReader(currentField.stringValue()));
// System.out.println("\n \n Stringvalue " +
// currentField.stringValue() +"\n \n toString " +
// currentField.toString());
String text = currentField.stringValue();
OffsetAttribute offsetAttribute = TStream.getAttribute(OffsetAttribute.class);
CharTermAttribute termAttribute = TStream.getAttribute(CharTermAttribute.class);
// siehe:
// http://stackoverflow.com/questions/2638200/how-to-get-a-token-from-a-lucene-tokenstreamc
// i.V.m.
// http://stackoverflow.com/questions/2638200/how-to-get-a-token-from-a-lucene-tokenstream
// TermToBytesRefAttribute termAttribute =
// TStream.getAttribute(TermAttribute.class);
TStream.reset();
while (TStream.incrementToken()) {
// int startOffset = offsetAttribute.startOffset();
// int endOffset = offsetAttribute.endOffset();
String term = termAttribute.toString();
System.out.println(term);
try {
tokenList.add(term);
} catch (Exception e) {
} // hier muss noch Fleisch rein

}
TStream.end();
TStream.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}

public int Anzahl(){
int i=0;



public String toString() throws NullPointerException {
int SucheFeld = 0;

return null;

}

public void close() {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

public int Anzahl() {

int i = 0;
try {
i = tokenList.size();
} catch (Exception e) {

}
// i = reader.
// i = iField.length; Quatsch Länge ist immer zwei, da zwei Felder

return i;

}

}

Large diffs are not rendered by default.

@@ -4,11 +4,11 @@ public class DeleteApp {

public static void main(String[] args) {
// TODO Auto-generated method stub
String S = "internOrdner";
DeleteDir d = new DeleteDir(S);
d.delete();
String S = "internOrdner";
DeleteDir d = new DeleteDir(S);
d.delete();

System.out.println("Success");
System.out.println("Success");
}

}
@@ -15,33 +15,31 @@
*
*/


public class DeleteDir {
String dir; // ordner-Pfad ist als String zu übergeben;


public DeleteDir(String dr) {
super();
this.dir = dr;
}

public void delete(){ // Aufruf der rekrusiven Methode
deleteDirectory(new File(dir)); // auch wenn hier ein neues File erzeugt wird, wird der alte Ordner gelöscht
}

static private boolean deleteDirectory(File path) { //arbeitet rekrusiv
if( path.exists() ) {
File[] files = path.listFiles();
for(int i=0; i<files.length; i++) {
if(files[i].isDirectory()) {
deleteDirectory(files[i]);
}
else {
files[i].delete();
}
}
}
System.out.println("gelöscht du nob");
return( path.delete() );
}
String dir; // ordner-Pfad ist als String zu übergeben;

public DeleteDir(String dr) {
super();
this.dir = dr;
}

public void delete() { // Aufruf der rekrusiven Methode
deleteDirectory(new File(dir)); // auch wenn hier ein neues File erzeugt
// wird, wird der alte Ordner gelöscht
}

static private boolean deleteDirectory(File path) { // arbeitet rekrusiv
if (path.exists()) {
File[] files = path.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
deleteDirectory(files[i]);
} else {
files[i].delete();
}
}
}
System.out.println("gelöscht du nob");
return (path.delete());
}
}
@@ -2,49 +2,46 @@

import java.io.*;

public class FReader { // Die Klasse hies er FileReader, was wegen eines Namenskonflikt eine sehr schlechte Idee war
private String path=null;

private int words=0;
public int getWords() {
return words;
}



public FReader(String path) {
super();
this.path = path;
}



public String getText(){
String S=null;
FileReader fr = null;
BufferedReader br;
try {
fr = new FileReader(path);
br = new BufferedReader(fr);

// Textzeilen der Datei einlesen und auf Konsole ausgeben:
String zeile;
zeile = br.readLine();

while (zeile != null) {
S += zeile;
zeile = br.readLine();

}
br.close();

}
catch (IOException e){
System.out.println("Fehler beim Lesen der Datei " + path);
System.out.println(e.toString());
}

return S;
}
public class FReader { // Die Klasse hies er FileReader, was wegen eines
// Namenskonflikt eine sehr schlechte Idee war
private String path = null;

private int words = 0;

public int getWords() {
return words;
}

public FReader(String path) {
super();
this.path = path;
}

public String getText() {
String S = null;
FileReader fr = null;
BufferedReader br;
try {
fr = new FileReader(path);
br = new BufferedReader(fr);

// Textzeilen der Datei einlesen und auf Konsole ausgeben:
String zeile;
zeile = br.readLine();

while (zeile != null) {
S += zeile;
zeile = br.readLine();

}
br.close();

} catch (IOException e) {
System.out.println("Fehler beim Lesen der Datei " + path);
System.out.println(e.toString());
}

return S;
}

}
@@ -6,23 +6,22 @@ public class IndexApp {

public static void main(String[] args) {
// TODO Auto-generated method stub

/*
* public Index(String fILES_TO_INDEX_DIRECTORY, String indexDir,
String fIELD_PATH, String fIELD_CONTENTS)
* public Index(String fILES_TO_INDEX_DIRECTORY, String indexDir, String
* fIELD_PATH, String fIELD_CONTENTS)
*/

IndexCreator i = new IndexCreator("C:/test/", "ersterTest", "INHALT");

/*Index(Ordner_mit_zu_indizierenden_Dateien, Dateipfad_Indey);*/

try{
NIOFSDirectory d = i.createIndex();
System.out.println(d.toString() + " - Erfolg ");
}
catch(Exception e){
System.out.println("Fehler");
e.printStackTrace();

IndexCreator i = new IndexCreator("C:/test/", "ersterTest", "INHALT");

/* Index(Ordner_mit_zu_indizierenden_Dateien, Dateipfad_Indey); */

try {
NIOFSDirectory d = i.createIndex();
System.out.println(d.toString() + " - Erfolg ");
} catch (Exception e) {
System.out.println("Fehler");
e.printStackTrace();
}
}

@@ -28,70 +28,78 @@
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.LockObtainFailedException;


import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.store.NoLockFactory;
import org.apache.lucene.util.Version;

public class IndexCreator {

/*
* Mehrer Dokumente werden eingelesen
*/
private String FILES_TO_INDEX_DIRECTORY ; // Verzeichnis von dem Eingelesen werden soll
private String IndexDir;// Indexverzeichnis
private String FIELD_PATH; // Feld in das der Suchindex geschrieben werden soll
//private String FIELD_CONTENTS ;





/*
* Mehrer Dokumente werden eingelesen
*/
private String FILES_TO_INDEX_DIRECTORY; // Verzeichnis von dem Eingelesen
// werden soll
private String IndexDir;// Indexverzeichnis
private String FIELD_PATH; // Feld in das der Suchindex geschrieben werden
// soll
// private String FIELD_CONTENTS ;

@SuppressWarnings("deprecation")
public NIOFSDirectory createIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); // brauchen wir den?? Englische Stoppwörter
public NIOFSDirectory createIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45); // brauchen
// wir
// den??
// Englische
// Stoppwörter
boolean recreateIndexIfExists = true;


NoLockFactory noLockFactory = NoLockFactory.getNoLockFactory();
NIOFSDirectory indexDir = new NIOFSDirectory(new File(IndexDir), noLockFactory);//, new LockFactory()); // koorrekt
NIOFSDirectory indexDir = new NIOFSDirectory(new File(IndexDir), noLockFactory);// ,
// new
// LockFactory());
// //
// koorrekt

org.apache.lucene.index.IndexWriterConfig config = new org.apache.lucene.index.IndexWriterConfig(Version.LUCENE_45, analyzer);
org.apache.lucene.index.IndexWriterConfig config = new org.apache.lucene.index.IndexWriterConfig(
Version.LUCENE_45, analyzer);

// config.setWriteLockTimeout(1000);
// config.setIndexDeletionPolicy( IndexDeletionPolicy.); geht irgendwie nett
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //vgl. http://stackoverflow.com/questions/17711347/avoid-indexing-documents-again-lucene

IndexWriter indexWriter = new IndexWriter(indexDir, config);//???
// config.setIndexDeletionPolicy( IndexDeletionPolicy.); geht irgendwie
// nett
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // vgl.
// http://stackoverflow.com/questions/17711347/avoid-indexing-documents-again-lucene

IndexWriter indexWriter = new IndexWriter(indexDir, config);// ???
File dir = new File(FILES_TO_INDEX_DIRECTORY);
File[] files = dir.listFiles();
for (File file : files) {
Document document = new Document();

String path = file.getCanonicalPath();
// document.add(new Field(FIELD_PATH, path, Field.Store.YES, Field.Index.NOT_ANALYZED));
// document.add(new Field(FIELD_PATH, path, Field.Store.YES,
// Field.Index.NOT_ANALYZED));
document.add(new Field("PFAD", path, Field.Store.YES, Field.Index.NOT_ANALYZED));


FReader fr = new FReader(path);
System.out.println(fr.getText());
// fieldTypee
// storeTermVectorPossition
// storeTermVectorPossition
// auslesen termPossitions Lucene
document.add(new Field(FIELD_PATH, fr.getText(), Field.Store.YES, Field.Index.ANALYZED));
// document.add(new Field("Words", (String) fr.getWords(), Field.Store.YES, Field.Index.NOT_ANALYZED));

// document.add(new Field("Words", (String) fr.getWords(),
// Field.Store.YES, Field.Index.NOT_ANALYZED));

System.out.println(document.toString());
indexWriter.addDocument(document);
}
// indexWriter.optimize(); nötig
indexWriter.close(); // hier Problem bei wiederholtem Aufruf
// indexDir.clearLock(FIELD_PATH);
//indexDir.close();
// indexWriter.optimize(); nötig
indexWriter.close(); // hier Problem bei wiederholtem Aufruf
// indexDir.clearLock(FIELD_PATH);
// indexDir.close();
System.out.println("Done - Index created");
return indexDir;
}

public IndexCreator(String fILES_TO_INDEX_DIRECTORY, String indexDir, String fIELD_PATH) {
super();
FILES_TO_INDEX_DIRECTORY = fILES_TO_INDEX_DIRECTORY;
@@ -67,8 +67,7 @@ private void read() {

try {

DocumentBuilderFactory dbFactory = DocumentBuilderFactory
.newInstance();
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();

DocumentBuilder builder;

@@ -84,11 +83,13 @@ private void read() {

org.jdom2.Element r = jDoc.getRootElement();

/* for (Iterator it = r.getDescendants(Filters.element());
it.hasNext();) {*/
/*
* for (Iterator it = r.getDescendants(Filters.element());
*
* it.hasNext();) {
*/
Iterator it = r.getDescendants(Filters.element());
while(it.hasNext()){
while (it.hasNext()) {

try {

@@ -102,7 +103,7 @@ private void read() {

retAr.add(S);// Achtung nur ArrayList

//n++; WTF, does it?
// n++; WTF, does it?

}

@@ -7,21 +7,22 @@ public class ParserApp {

public static void main(String[] args) {
// TODO Auto-generated method stub
Parser p = new Parser("src/stadien.xml", "location");

/*
in das erste Feld ist der relative Pfade zu dem zu parsendem xml-Dokument anzugeben
in das zweite Feld ist der auszulesende XML-TAG Anzugeb
*/
List <String> test;
test=p.getRetAr();

Parser p = new Parser("src/stadien.xml", "location");

/*
* in das erste Feld ist der relative Pfade zu dem zu parsendem
* xml-Dokument anzugeben in das zweite Feld ist der auszulesende
* XML-TAG Anzugeb
*/
List<String> test;
test = p.getRetAr();

ListIterator<String> ListenIterator = test.listIterator();
while(ListenIterator.hasNext()){

while (ListenIterator.hasNext()) {
System.out.println(ListenIterator.next());
}

}

}
@@ -4,7 +4,7 @@ public class Test {

public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println("test");
System.out.println("test");
}

}
@@ -0,0 +1 @@
/de/
@@ -1,5 +1,5 @@
archivedir=C:/Users/ITDLZ-ROW/Desktop/crawler/RSSCrawler
sourcedir=C:/Users/ITDLZ-ROW/Desktop/crawler/sources
subscriptiondir=C:/Users/ITDLZ-ROW/Desktop/crawler/subscriptions
schemadir=C:/Users/ITDLZ-ROW/Desktop/crawler/schemas
archivedir=C:/Users/Christoph/git/PraktikumProgrammieren/crawler/RSSCrawler
sourcedir=C:/Users/Christoph/git/PraktikumProgrammieren/crawler/sources
subscriptiondir=C:/Users/Christoph/git/PraktikumProgrammieren/crawler/subscriptions
schemadir=C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemas

@@ -910,3 +910,11 @@ Tue, 24 Nov 2015 11:06:50 CET: class RSSThread, method processSubscription, Writ
Tue, 24 Nov 2015 14:36:03 CET: class DirectoryMonitor, method run, Unhandled exception: java.lang.NullPointerException
Tue, 24 Nov 2015 14:36:45 CET: class DirectoryMonitor, method run, Unhandled exception: java.lang.NullPointerException
Tue, 24 Nov 2015 14:36:45 CET: class DirectoryMonitor, method run, Unhandled exception: java.lang.NullPointerException
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSubscription, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSSubscription.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Ignoring invalid URI: www.main-netz.de/rss/kultur.xml
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method readSourceFile, Error message: org.xml.sax.SAXParseException; schema_reference.4: Failed to read schema document 'file:/C:/Users/Christoph/git/PraktikumProgrammieren/crawler/schemasRSSFeeds.xsd', because 1) could not find the document; 2) the document could not be read; 3) the root element of the document is not <xsd:schema>.
@@ -587,3 +587,9 @@ Tue, 24 Nov 2015 11:06:46 CET: class RSSThread, method run, Override interval wi
Tue, 24 Nov 2015 11:06:46 CET: class RSSThread, method run, First visit: http://www.spiegel.de/wissenschaft/index.rss
Tue, 24 Nov 2015 11:06:46 CET: class RSSThread, method run, First visit: http://newsfeed.zeit.de/politik/index
Tue, 24 Nov 2015 11:06:50 CET: class RSSThread, method run, First visit: http://rss.cnn.com/rss/edition_entertainment.rss
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method update, Start to read new subscriptions
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method update, Finish to read new subscriptions
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method update, Start to read new sources
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method update, Finish to read new sources
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method run, RSS Server started!
Wed, 25 Nov 2015 14:21:11 CET: class RSSThread, method run, First visit: http://newsfeed.zeit.de/studium/index

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.