https://github.com/Waikato

https://www.cs.waikato.ac.nz/ml/weka/

In [1]:
// @file:DependsOn("nz.ac.waikato.cms.weka:weka-stable:3.8.6")
@file:DependsOn("nz.ac.waikato.cms.weka:weka-dev:3.9.6")
import weka.core.*
import weka.core.converters.ConverterUtils.*
import weka.core.converters.*
import weka.filters.*
import weka.filters.unsupervised.attribute.*

In [6]:
val source = DataSource("data/weka/cpu.arff")
val instances = source.getDataSet()
println(instances.numInstances())
println(instances.toSummaryString())
println(instances.toString())

209
Relation Name:  cpu
Num Instances:  209
Num Attributes: 7

     Name                      Type  Nom  Int Real     Missing      Unique  Dist
1 MYCT                       Num   0% 100%   0%     0 /  0%    19 /  9%    60 
2 MMIN                       Num   0% 100%   0%     0 /  0%    11 /  5%    25 
3 MMAX                       Num   0% 100%   0%     0 /  0%     6 /  3%    23 
4 CACH                       Num   0% 100%   0%     0 /  0%     4 /  2%    22 
5 CHMIN                      Num   0% 100%   0%     0 /  0%     4 /  2%    15 
6 CHMAX                      Num   0% 100%   0%     0 /  0%     9 /  4%    31 
7 class                      Num   0% 100%   0%     0 /  0%    72 / 34%   116 

@relation cpu

@attribute MYCT numeric
@attribute MMIN numeric
@attribute MMAX numeric
@attribute CACH numeric
@attribute CHMIN numeric
@attribute CHMAX numeric
@attribute class numeric

@data
125,256,6000,256,16,128,198
29,8000,32000,32,8,32,269
29,8000,32000,32,8,32,220
29,8000,32000,32,8,32,172
29,

In [24]:
val attributes = mutableListOf<Attribute>() as ArrayList

val catVals = mutableListOf<String>()
catVals.add("sports")
catVals.add("finance")
catVals.add("news")
attributes.add(Attribute("category(att1)", catVals))

attributes.add(Attribute("visits(att2)"))

attributes.add(Attribute("title(att3)",null as List<String>?))

attributes.add(Attribute("posted(att4)","yyyy-MM-dd"))

val data = Instances("Runtime dataset", attributes, 0)

val vals = doubleArrayOf(
    data.attribute(0).indexOfValue("sports").toDouble(),
    8527.0,
    data.attribute(2).addStringValue("2012 Summer").toDouble(),
    data.attribute(3).parseDate("2012-07-27")
)
data.add(DenseInstance(1.0,vals))

println(data.toString())

val saver = ArffSaver()
// saver.setInstances(data)
// saver.setFile(java.io.File("data/weka/runtime.arff"))
// saver.writeBatch()
saver.setRetrieval(ArffSaver.INCREMENTAL)
saver.setInstances(data)
saver.setFile(java.io.File("data/weka/runtime-incremental.arff"))
for(i in 0..data.numInstances()-1){
    saver.writeIncremental(data.instance(i))
}
saver.writeIncremental(null)

@relation 'Runtime dataset'

@attribute category(att1) {sports,finance,news}
@attribute visits(att2) numeric
@attribute title(att3) string
@attribute posted(att4) date yyyy-MM-dd

@data
sports,8527,'2012 Summer',2012-07-27


In [27]:
val opts = arrayOf<String>("-R","2")
val remove = Remove()
remove.setOptions(opts)
remove.setInputFormat(data)
val newData = Filter.useFilter(data, remove)
println(newData)

@relation 'Runtime dataset-weka.filters.unsupervised.attribute.Remove-R2'

@attribute category(att1) {sports,finance,news}
@attribute title(att3) string
@attribute posted(att4) date yyyy-MM-dd

@data
sports,'2012 Summer',2012-07-27


# 可视化

In [1]:
"""import java.awt.BorderLayout;import java.io.BufferedReader;import java.io.FileReader;import weka.classifiers.trees.J48;import weka.core.Instances;import weka.gui.treevisualizer.PlaceNode2;import weka.gui.treevisualizer.TreeVisualizer;...Instances data = new Instances(new BufferedReader(new FileReader(dataset)));data.setClassIndex(data.numAttributes() - 1);J48 classifier = new J48();classifier.buildClassifier(data);TreeVisualizer tv = new TreeVisualizer(null, classifier.graph(), new PlaceNode2());JFrame frame = new javax.swing.JFrame("Tree Visualizer");frame.setSize(800, 500);frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);frame.getContentPane().add(tv);frame.setVisible(true);tv.fitToScreen();"""
.replace(";",";\n")

import java.awt.BorderLayout;
import java.io.BufferedReader;
import java.io.FileReader;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.gui.treevisualizer.PlaceNode2;
import weka.gui.treevisualizer.TreeVisualizer;
...Instances data = new Instances(new BufferedReader(new FileReader(dataset)));
data.setClassIndex(data.numAttributes() - 1);
J48 classifier = new J48();
classifier.buildClassifier(data);
TreeVisualizer tv = new TreeVisualizer(null, classifier.graph(), new PlaceNode2());
JFrame frame = new javax.swing.JFrame("Tree Visualizer");
frame.setSize(800, 500);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.getContentPane().add(tv);
frame.setVisible(true);
tv.fitToScreen();


# swing显示树状图

In [2]:
import java.awt.BorderLayout;
import javax.swing.JFrame;
import java.io.BufferedReader;
import java.io.FileReader;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.gui.treevisualizer.PlaceNode2;
import weka.gui.treevisualizer.TreeVisualizer;
val data = Instances(BufferedReader(FileReader("data/weka/glass.arff")));
data.setClassIndex(data.numAttributes() - 1);
val classifier = J48();
classifier.buildClassifier(data);
val tv = TreeVisualizer(null, classifier.graph(), PlaceNode2());
val frame = javax.swing.JFrame("Tree Visualizer");
frame.setSize(800, 500);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.getContentPane().add(tv);
frame.setVisible(true);
tv.fitToScreen();