CSC662 ก
ก
WekaARFF
!. . ก "# $% $ &
%''(!$ '()!$ *"+#ก (,)
Weka
2
12,)3' '"
4 Weka
56#$78ก 6
$9ก )4,1#68
ก )4,1#6ก:13#
ก )4,1#6*4ก %)
ก )4,1#6ก ' ,ก กก";
ก #ก <,#' 6 Knowledge flow
Weka
3
Weka;*กWaikato Environment for Knowledge Analysis
Weka = )3*ก*;% GPL license
%>*)2#,
ก ก 6' 13#(Machine learning)ก ก )4,1#6(Data mining)
6;$4, *ก 6
GUI'4$3#ก $3#,
4 Weka
Weka
4
,กก Explorer Weka
6
Target table
6 ARFF
Filter
)3,$
1ก8ก 28 7
Weka
5
Simple CLI (Command Line Interface) = ก '4$3#ก )4# ;ก 77
Explorer = ก )3กก>( GUI
Experimenter = ก )3กก )#ก )$
KnowledgeFlow = ก ก #ก <,#
' 6
ArffViewer = ก )3$4, ก< %)
Arff
Log = ก )3;')?กก@ ,;#ก )4#
ก ,ก# Weka
Weka
6
,;#,ก# Weka
Weka
7
$; ก# Explorer
9#
6,ก "A713B6
"A1ก8ก
*ก ก6
$9 C**"
$ "6
($#
;#ก
(
$ "
ก>(
*4 )3 1ก
"A< Log ($#
ก
Weka
8
Preprocess ก 6
Classify 6ก )4,1#6*;# %)
Cluster 6ก )4,1#6ก กก";
Associate 6ก )4,1#6ก:13#
Select attributes 6$4, ก ' ,' ก37#ก>( *4
Visualize 4$6%78 $#
6,ก# Explorer
Weka
9
Log box $#)?กก ก# Weka
)2#, ' 7)3ก?2*$#$;2
Status box $#ก C**"#
Weka ก *#ก 7;<;
13ก$"A $ 91กก $#,;'*4)3
<
Bird icon $# 6กก 9ก กก*
< D2กก*3#D E
$; ก13# Explorer
Weka
10
6)3 #6; 6
ASCII *=arff, csv, C45 ก (66;' 1;
6$ 9 ก!
URL
, 1*6)36;5 6)313# ;JDBC
%)#6)3 <
Weka
11
ARFF = Attribute-Relation File Format
ก@ ASCII
@relation name = ))3ก13 #6#$78
@attribute att-name type = ))3ก13ก>( *4
• numeric , 1 real ,9?#ก>( *4ก@=
• (v1, v2, …, vn) ,9?#ก>( *4ก@';<;;13#
@data = ))3ก9?#9)3*=6 9,?3#
#ก>( *4)3ก<# '3'
6 Arff
Weka
12
@relation weather
@attribute outlook {sunny, overcast, rainy}
@attribute temperature real
@attribute humidity real
@attribute windy {TRUE, FALSE}
@attribute play {yes, no}
@data
sunny,85,85,FALSE,no sunny,80,90,TRUE,no overcast,83,86,FALSE,yes rainy,70,96,FALSE,yes rainy,68,80,FALSE,yes rainy,65,70,TRUE,no overcast,64,65,TRUE,yes sunny,72,95,FALSE,no sunny,69,70,FALSE,yes rainy,75,80,FALSE,yes sunny,75,70,TRUE,yes overcast,72,90,TRUE,yes overcast,81,75,FALSE,yes rainy,71,91,TRUE,no
;# weather.arff
Weka
13
Explorer 13B weather.arff
ก # ก>( *4)3
1ก<
ก ,?3# #ก>(
*4)396ก1ก<
$ "';$9
# ก>(
*4)396ก 1ก
Weka
14
ก ; 78
*4ก>( *4)3)2#, 5 #4#2
outlook, temperature, humidity, windy, play *4 )2#, 14
$4, ก>( *4 outlook =ก>( *4 %)
Nominal )37* (<;6 )3,< ';)3 ก;#ก)2#, 3 '; ?3#;';<;7#';
';#ก>( *4'1 sunny 5 overcast 4
rainy 5
Weka
15
ก>( *413 E weather.arff
Weka
16
ก arff
ก ก $ # text file ก@< ; notepad++
) ก,$;
@relation relation_name
)9,$;ก>( *4 #4
@attribute att_name value
@data 6$; #4ก ก#ก>( *4
@data 1,2,3,4
Weka
17
ID,SEX,PASS/FAIL,Score,Class 1,M,Pass,45.5,B
2,F,Pass,56.78,B 3,M,Pass,89,A 4,F,Pass,77,A 5,M,Fail,32,C 6,F,Fail,12,D 7,M,Fail,35,C 8,F,Pass,62,B 9,M,Pass,68,B+
10,F,Fail,10,D
;#6 sample01.csv
Weka
18
,#B sample01.csv
07/31/07 Weka
19
$9,?3# (Univariate statistic) =ก ' ,$9)37#,?3#
ก>( *4 = ก
13#ก>( *4
#ก>( *4 7#$#'1 Nominal , 1 Numeric *46)3,<)= @ก*46)2#,
';)3ก;#ก)2#,ก>( *4
';)37#'; '= @)ก';)2#,
$9 Weka
07/31/07 Weka
20
13ก>( *4SEX
#';#ก>( *4=Nominal ก>( *42<;';)3,<
';)3ก;#ก7#$#';'1MกF ';)3=M*45
';)3=F*45
';)3<;24<;
;#$9#ก>( *4 sex
07/31/07 Weka
21
13ก>( *4Score
#';#ก>( *4=Numeric
ก>( *4<;6)3,<
*4';)3ก;#ก)2#,10
';;7#,?3# (<;';)324ก)
';34$"Minimum = 10 ';$6#$"Maximum = 89 ';D3'(Mean = 48.728 $;3# 5StdDev = 26.585
;#$9#ก>( *4 score
07/31/07 Weka
22
Weka $ 9$#ก "##;ก>(
*4ก );# ก)3 visualize all
ก #,?3#
07/31/07 Weka
23
Weka $#ก ,;#ก>(
*4$#ก>(9 visualize ก #ก; ก; Scatter plot ?3#;
*"$# ,?3# ;ก) ';#ก>( *4;';
#ก 3 PlotSize
3#*" 3
PointSize ก"A Update $#*"$13;'$)3 ก:6
ก #$#
07/31/07 Weka
24
Weka $ 91ก
$#ก #$#ก>(
*4'6; E
ก"A Select attributes
1กD7ก>( *4)3
#ก 7* ( (ก Ctrl ,;#1ก)
ก"A Update
ก 1ก$# Scatter plot
07/31/07 Weka
25
ก ,;#$# ,?3#'6;
1ก ก X )3
#ก
1ก ก Y )3
#ก
%7;#
)31ก $#< 4
07/31/07 Weka
26
Weka'4(,$ก 99#$ D7ก>( *4)3=*4
46 Weka
1ก9Classify
ก";#6Classifier กChoose1กC#ก(functions)1ก
LinearRegression
3Test options ,Use training set
1กก>( *4ก;#Test optionsD7)3(Num)#,
กStart
78*$#ก;#Classifier output
$ก 99$,$78
07/31/07 Weka
27
$ก 99#$# petallength
07/31/07 Weka
28
ก ()3ก>( *4)3#ก <;;*4 ,Logistic regression
46 Weka
1ก9Classify
ก;#Classifier ก Choose1กC#ก(functions)1กLogistic 3Test options =Use training set
1กก>( *4ก;#Test optionsD7)3=(Nom) ก Start
78*$#ก;#Classifier output
$ก 99$,$78
07/31/07 Weka
29
$ก 99#$# play
07/31/07 Weka
30
$ก 99#$# play
Weka
31
$ "
Weka %>* ก ',
' 6)3;6;6
2 * Explorer =,ก
6 6 ARFF
Filter ก #6)3#ก
1ก)3,$ก4,7 28
Weka $ 9B %) csv <