Saran untuk penelitian akhir ini adalah :
1. Data sekuen protein ditambah lebih banyak terutama untuk data
sekuen protein sehat dan penambahan tingkat stadium untuk setiap data sekuen protein kanker payudara dan paru-paru.
2. Range nilai dalam atribut disederhanakan dengan interval nilai
atau kategorisasi.
3. Menggunakan pre-processing dan ekstraksi ciri yang berbeda
untuk penelitian selanjutnya.
4. Pada algoritma decision tree dapat ditambah method prune untuk
DAFTAR PUSTAKA
Branslava Gemovic, Vladimir Perovic, et al. Feature –Base Classification of
Amino Acid Subtantions outside Conserved Functional Protein Domains. Hindawai Publishing Corporation The Scientific Word Journal.
F.Z. Chelali, K. Sadeddine, et al. Speaker indetification system using LPC- Application on Berber language.HD-SKD Internasional Jurnal.
Han, Jiawei and Micheline Kamber. Data Mining : Concepts and techniques 2nd edition. 2006. Morgan Kaufmann. Sanfrancisco, CA.
http://labrosa.ee.columbia.edu/matlab/rastamat/dolpc.m. Diakses pada tanggal 5 Mei 2014.
http://www.uniprot.org/uniprot/. Diakses pada tanggal 20 november 2013
Kementerian Kesehatan RI. infoDatin, Pusat Data dan Informasi Kementrian Kesehatan RI, STOP KANKER. 2015. Jakarta.
Luciana Kuswibawati. Kanker. 2000. Universitas Sanata Dharma. Yogyakarta. Mining. 2004. CRC Press LLC. USA.
Mitra, Sushmita dan Tinku Acharya. Data Mining : multimedia, soft computing, and bioinformatics. 2003. John Wiley & Sons, Inc.. New Jersey.
Nandra Pradipta. Implementasi Algoritma FFT (Fast Fourier Transform) Pada Digital Signal Processor (DSP) TMS320C542. Jurnal Universitas Diponegoro
Novi Arya Kurnianto. Pententuan Jenis Kelamin Itik dengan Metode Dinamic Time Warping (DWT). Jurnal Universitas Diponegoro.
Pal, Sankar K. dan Pabitra Mitra. Pattern Recognition Algorithms for Data Mining. 2004. CRC Press LLC.USA.
Prabowo pudjo Widodo, et al. Penerapan Data Mining dengan Matlab.2013. Rekayasa Sains. Bandung.
Putra, dharma .Pengolahan Citra Digital . 2010.Andi Offset. Yogyakarta
Ria K., Marji, Widodo. Penggunaan Metode Pengelompokkan K-Means pada Klasifikasi KNN Untuk Penentuan Jenis Kanker Berdasarkan Susunan Protein. Jurnal Universitas Brawijaya.
Rizki., 1 desemeber 2008., Bioinformatika. http://bioinformatika-q.blogspot.co.id. Diakses pada tanggal 28 November 2013.
Santoso, Budi. DATA MINING : Teknik Pemanfaatan Data untuk Keperluan Bisnis. 2007. Graha Ilmu. Yogyakarta.
Sri Hartanti Yuliana. Kanker. 2000. Universitas Sanata Dharma. Yogyakarta. St. Clair, Caroline dan Jonathan Visick. Exploring Bioinformatics : A
Projectbased Approach. 2010. Jones and Bartlett Publishers. USA.
Tim kumpulan isitilah .2013. Pengertian kanker paru-paru
http://www.kumpulanistilah.com. Diakses pada tanggal 28 November 2013
Wahab Naira S., 25 agustus 2015., Mengapa Kanker Paru dan Kanker Payudara
tertinggi diindonesia?. http://www.kompasiana.com. Diakses pada tanggal
8 November 2015
Wirahadikusuma, Muhamad. Biokimia “protein, enzim dan asam nukleat”. 1977.
LAMPIRAN I : Code Program
1. File Preprosesing.mat
function [f,databaru] = preprosses(x)
%UNTITLED4 Summary of this function goes here % Detailed explanation goes here
%membaca fasta% %x = 'C:\Users\quadran\Documents\MATLAB\program\databaru\kelas1\kelas1 (1).fasta'; a=fastaread(x);
%hanya mengambil sequence% s=a.Sequence; l = length(s); ds = []; for i = 1:l hr = s(i); if hr == 'L' ds(i)=0.0000; elseif hr == 'I' ds(i)=0.0000; elseif hr == 'N' ds(i)=0.0036; elseif hr == 'G' ds(i)=0.0050; elseif hr == 'V' ds(i)=0.0057; elseif hr == 'E'
ds(i)=0.0058; elseif hr == 'P' ds(i)=0.0198; elseif hr == 'H' ds(i)=0.0242; elseif hr == 'K' ds(i)=0.0371; elseif hr == 'A' ds(i)=0.0373; elseif hr == 'Y' ds(i)=0.0516; elseif hr == 'W' ds(i)=0.0548; elseif hr == 'Q' ds(i)=0.0761; elseif hr == 'M' ds(i)=0.0823; elseif hr == 'S' ds(i)=0.0829; elseif hr == 'C' ds(i)=0.0829; elseif hr == 'T' ds(i)=0.0941; elseif hr == 'F' ds(i)=0.0954; elseif hr == 'R' ds(i) =0.0956; elseif hr == 'D' ds(i)=0.1263; end
end f=ds; timeBase = fft(ds); % % fs = 100; % l = length(ds); % w =(1:l)/l*(fs/2); plot(1:l,abs(timeBase(1:l)));figure(gcf); % satulpc = lpc (timeBase,8); % stem(satulpc,'DisplayName','satulpc');figure(gcf) lpc =dolpc (timeBase,8); % stem(dellpc,'DisplayName','dellpc');figure(gcf); deltalpc = deltas(lpc,8); % stem(deltalpc,'DisplayName','deltalpc');figure(gcf); deltadeltalpc = deltas(deltalpc,8); % stem(deltadeltalpc,'DisplayName','deltadeltalpc');figure(gcf);
databaru = [lpc; deltalpc; deltadeltalpc]; %databaru = [lpc; deltalpc];
%databaru = [lpc]; %plot(1:39,databaru); plot(1:27,databaru);
stem(databaru,'DisplayName','databaru');figure(gcf)
f=databaru; %f=databaru;
end
2. foldSaya.mat
function [Training1, Testing1, Training2, Testing2, Training3, Testing3] = foldSaya( Hasilf )
%UNTITLED Summary of this function goes here
[N] = size(Hasilf,1); k = 3; c1 = cvpartition(N,'kfold',k); Training1 = Hasilf(find(c1.training(1) == 1), :); Testing1 = Hasilf(find(c1.test(1) == 1), :); Training2 = Hasilf(find(c1.training(2) == 1), :); Testing2 = Hasilf(find(c1.test(2) == 1), :); Training3 = Hasilf(find(c1.training(3) == 1), :); Testing3 = Hasilf(find(c1.test(3) == 1), :); end 3. doLPC.mat function y = dolpc(x,modelorder) %y = dolpc(x,modelorder) %
% compute autoregressive model from spectral magnitude samples %
% rows(x) = critical band % col(x) = frame
%
% row(y) = lpc a_i coeffs, scaled by gain % col(y) = frame
%
% modelorder is order of model, defaults to 8
[nbands,nframes] = size(x); if nargin < 2 modelorder = 8; end % Calculate autocorrelation r = real(ifft([x;x([(nbands-1):-1:2],:)])); % First half only
r = r(1:nbands,:);
% Find LPC coeffs by durbin [y,e] = levinson(r, modelorder);
% Normalize each poly by gain y = y'./repmat(e',(modelorder+1),1);
end
4. deltas.mat
function d = deltas(x, w)
% D = deltas(X,W) Calculate the deltas (derivatives) of a sequence % Use a W-point window (W odd, default 9) to calculate deltas using a % simple linear slope. This mirrors the delta calculation performed % in feacalc etc. Each row of X is filtered separately.
% 2003-06-30 dpwe@ee.columbia.edu
if nargin < 2 w = 9;
[nr,nc] = size(x);
if nc == 0
% empty vector passed in; return empty vector d = x;
else
% actually calculate deltas
% Define window shape
hlen = floor(w/2); w = 2*hlen + 1; win = hlen:-1:-hlen;
% pad data by repeating first and last columns
xx = [repmat(x(:,1),1,hlen),x,repmat(x(:,end),1,hlen)];
% Apply the delta filter
d = filter(win, 1, xx, [], 2); % filter along dim 2 (rows)
% Trim edges
d = d(:,2*hlen + [1:nc]);
end
5. ent.mat
function result = ent(Y)
% Calculates the entropy of a vector of values
tab = tabulate(Y); prob = tab(:,3) / 100; % Filter out zero-entries prob = prob(prob~=0); % Get entropy
result = -sum(prob .* log2(prob))
6. cond_ent.mat
function result = cond_ent(Y, X)
% Calculates the conditional entropy of y given x result = 0; tab = tabulate(X); % Remove zero-entries tab = tab(tab(:,2)~=0,:); for i = 1:size(tab,1)
% Get entropy for y values where x is the current value H = ent(Y(X == tab(i,1)));
% Get probability prob = tab(i, 3) / 100; % Combine
result = result + prob * H;
end
7. build_tree.mat
function t = build_tree(X,Y,cols)
% Builds a decision tree to predict Y from X. The tree is grown by % recursively splitting each node using the feature which gives the best % information gain until the leaf is consistent or all inputs have the same % feature values.
%
% X is an nxm matrix, where n is the number of points and m is the % number of features.
% Y is an nx1 vector of classes
% cols is a cell-vector of labels for each feature %
% RETURNS t, a structure with three entries:
% t.p is a vector with the index of each node's parent node
% t.inds is the rows of X in each node (non-empty only for leaves) % t.labels is a vector of labels showing the decision that was made to get % to that node
% Create an empty decision tree, which has one node and everything in it
inds = {1:size(X,1)}; % A cell per node containing indices of all data in
that node
p = 0; % Vector contiaining the index of the parent node for each node
labels = {}; % A label for each node
bVal = {}; coloum = {};
% Create tree by splitting on the root
[inds p labels bVal coloum] = split_node(X, Y, inds, p,labels, cols, 1, bVal,coloum); t.inds = inds; t.p = p; t.labels = labels; t.bVal = bVal; t.coloum = coloum;
function [inds p labels bVal coloum] = split_node(X, Y, inds, p, labels, cols, node, bVal, coloum)
% Recursively splits nodes based on information gain
% Check if the current leaf is consistent
if numel(unique(Y(inds{node}))) == 1
return;
end
% Check if all inputs have the same features
% We do this by seeing if there are multiple unique rows of X
if size(unique(X(inds{node},:),'rows'),1) == 1
return;
end
% Otherwise, we need to split the current node on some feature
best_ig = -inf; %best information gain
best_feature = 0; %best feature to split on
best_val = 0; % best value to split the best feature on
curr_X = X(inds{node},:); curr_Y = Y(inds{node}); % Loop over each feature
for i = 1:size(X,2) feat = curr_X(:,i);
% Deterimine the values to split on vals = unique(feat);
if numel(vals) < 2 continue
end
% Get binary values for each split value
bin_mat = double(repmat(feat, [1 numel(splits)]) < repmat(splits', [numel(feat) 1]));
% Compute the information gains H = ent(curr_Y);
H_cond = zeros(1, size(bin_mat,2)); for j = 1:size(bin_mat,2)
H_cond(j) = cond_ent(curr_Y, bin_mat(:,j)); end
IG = H - H_cond;
% Find the best split [val ind] = max(IG); if val > best_ig best_ig = val; best_feature = i; best_val = splits(ind); end end
% Split the current node into two nodes feat = curr_X(:,best_feature);
feat = feat < best_val;
inds = [inds; inds{node}(feat); inds{node}(~feat)]; inds{node} = [];
labels = [labels; sprintf('%s < %2.2f', cols{best_feature}, best_val); sprintf('%s >= %2.2f', cols{best_feature}, best_val)];
bVal = [bVal; best_val; best_val];
coloum = [coloum;best_feature;best_feature]; % Recurse on newly-create nodes
n = numel(p)-2;
[inds p labels bVal coloum] = split_node(X, Y, inds, p, labels, cols, n+1, bVal,coloum);
[inds p labels bVal coloum] = split_node(X, Y, inds, p, labels, cols, n+2, bVal,coloum);
8. MainGUI.mat
function varargout = GUIAkhir(varargin) % MAINGUI M-file for MainGUI.fig
% MAINGUI, by itself, creates a new MAINGUI or raises the existing % singleton*.
%
% H = MAINGUI returns the handle to a new MAINGUI or the handle to
% the existing singleton*. %
% MAINGUI('CALLBACK',hObject,eventData,handles,...) calls the local
% function named CALLBACK in MAINGUI.M with the given input arguments.
%
% MAINGUI('Property','Value',...) creates a new MAINGUI or raises the
% existing singleton*. Starting from the left, property value pairs are % applied to the GUI before MainGUI_OpeningFcn gets called. An % unrecognized property name or invalid value makes property application
%
% *See GUI Options on GUIDE's Tools menu. Choose "GUI allows only one
% instance to run (singleton)". %
% See also: GUIDE, GUIDATA, GUIHANDLES
% Edit the above text to modify the response to help MainGUI
% Last Modified by GUIDE v2.5 31-Jan-2016 23:01:37
% Begin initialization code - DO NOT EDIT gui_Singleton = 1;
gui_State = struct('gui_Name', mfilename, ...
'gui_Singleton', gui_Singleton, ...
'gui_OpeningFcn', @MainGUI_OpeningFcn, ...
'gui_OutputFcn', @MainGUI_OutputFcn, ...
'gui_LayoutFcn', [] , ...
'gui_Callback', []);
if nargin && ischar(varargin{1})
gui_State.gui_Callback = str2func(varargin{1});
end
if nargout
[varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:});
else
gui_mainfcn(gui_State, varargin{:});
end
% --- Executes just before MainGUI is made visible.
function MainGUI_OpeningFcn(hObject, eventdata, handles, varargin) % This function has no output args, see OutputFcn.
% hObject handle to figure
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % varargin command line arguments to MainGUI (see VARARGIN)
% Choose default command line output for MainGUI handles.output = hObject;
% Update handles structure guidata(hObject, handles);
% UIWAIT makes MainGUI wait for user response (see UIRESUME) % uiwait(handles.figure1);
% --- Outputs from this function are returned to the command line.
function varargout = MainGUI_OutputFcn(hObject, eventdata, handles) % varargout cell array for returning output args (see VARARGOUT); % hObject handle to figure
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA)
% Get default command line output from handles structure varargout{1} = handles.output;
function BtnProses_Callback(hObject, eventdata, handles) % hObject handle to BtnProses (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) clc;
warning off;
set(handles.axTotalLPC,'Visible','on'); n=1;
m=1; Hasilf=[]; class=[];
for i=1:3
folder=['C:\Users\quadran\Documents\MATLAB\decision tree\class
',num2str(n),'\']; j = 1;
file = [folder,'class ',num2str(n),' (',num2str(j),').fasta']; while exist(file, 'file') == 2
[f,databaru] = preprosses(file);
axes(handles.axTotalLPC)
stem(databaru,'DisplayName','databaru'); pause(0.01); f = f'; Hasilf=[Hasilf;f]; class(m)=n; m=m+1; j=j+1;
file = [folder,'class ',num2str(n),' (',num2str(j),').fasta']; end
n=n+1; end class=class'; [m,n]=size(Hasilf); for i=1:m for j=1:n if isnan(Hasilf(i,j)) Hasilf(i,j)=0; end end end for i=1:37 classAwal{i,1}='normal'; end for i=38:292 classAwal{i,1}='KPr'; end for i=293:416 classAwal{i,1}='KPy'; end Hasilf=[class,Hasilf];
[Training1, Testing1, Training2, Testing2, Training3, Testing3] = foldSaya( Hasilf );
save('hasilPre-processing.mat','Hasilf','classAwal' , 'Training1', 'Testing1',
'Training2', 'Testing2', 'Training3', 'Testing3');
uiwait(msgbox('Pre-processing sudah selesai.'));
% --- Executes on button press in BtnAkurasi.
function BtnAkurasi_Callback(hObject, eventdata, handles) % hObject handle to BtnAkurasi (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA)
load('hasilPre-processing.mat');
load('tree.mat','t');
class = testingTree(Testing3(:, 2:end), t, Training3(:, 1))'; result = [Testing3(:, 1), class];
acc = CalculateAcc(result); save('acc.mat', 'acc');
set(handles.HasilAkurasi, 'String', strcat(int2str(acc), '%'));
function class = testingTree(att,tree,X) %% X = Class awal
class = [];
for i = 1 : size(att, 1) curr = 1;
idx = find(tree.p == curr);
while isempty(tree.inds{curr})
%att(tree.coloum(idx(1)))
curr = idx(1); else
curr = idx(2); end
idx = find(tree.p == curr); end
tree.inds{curr}; %%mencocokan data val = X(tree.inds{curr}); class = [class unique(val)];
end
function acc = CalculateAcc(result) equ = 0; tot = 0; for i = 1 : size(result, 1) if(result(i, 1) == result(i, 2)) equ = equ + 1; end tot = tot + 1; end
acc = equ * 100 / tot;
% --- Executes on button press in BtnTree.
function BtnTree_Callback(hObject, eventdata, handles) % hObject handle to BtnTree (see GCBO)
% handles structure with handles and user data (see GUIDATA) %% Load the auto data
load('hasilPre-processing.mat');
M = Training3;
% We want to predict the first column... Y = M(:,1);
% ...based on the others X = M(:,2:end); %%untuk orde 8 %cols = {'att1','att2','att3','att4','att5','att6','att7','att8','att9'}; %cols = {'att1','att2','att3','att4','att5','att6','att7','att8','att9','att10','att11','att12','att13', 'att14','att15','att16','att17','att18'}; cols =
{'att1','att2','att3','att4','att5','att6','att7','att8','att9','att10','att11','att12','att13',
'att14','att15','att16','att17','att18',...
'att19','att20','att21','att22','att23','att24','att25','att26','att27'}; %%untuk orde 12 %cols = {'att1','att2','att3','att4','att5','att6','att7','att8','att9','att10','att11','att12','att13' }; %cols ={'att1','att2','att3','att4','att5','att6','att7','att8','att9','att10','att11',... %'att12','att13','att14','att15','att16','att17','att18','att19','att20','att21','att22',' att23','att24','att25','att26'}; %cols ={'att1','att2','att3','att4','att5','att6','att7','att8','att9','att10','att11',... % 'att12','att13','att14','att15','att16','att17','att18','att19','att20','att21',... %'att22','att23','att24','att25','att26','att27','att28','att29','att30','att31',.... %'att32','att33','att34','att35','att36','att37','att38','att39'};
t = build_tree(X,Y,cols);
h1 = figure;
%% Display the tree treeplot(t.p');
title('Decision tree (Visual Tree)'); [xs,ys,h,s] = treelayout(t.p'); for i = 2:numel(t.p) % Get my coordinate my_x = xs(i); my_y = ys(i);
% Get parent coordinate parent_x = xs(t.p(i)); parent_y = ys(t.p(i));
% Calculate weight coordinate (midpoint) mid_x = (my_x + parent_x)/2;
mid_y = (my_y + parent_y)/2; % Edge label text(mid_x,mid_y,t.labels{i-1}); % Leaf label if ~isempty(t.inds{i}) val = Y(t.inds{i}); if numel(unique(val))==1
else
%inconsistent data
text(my_x, my_y, sprintf('**y=%2.2f\nn=%d', mode(val),
numel(val))); end end end save ('tree.mat','t');
% --- Executes on button press in UTbutton.
function UTbutton_Callback(hObject, eventdata, handles) % hObject handle to UTbutton (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA)
load('hasilPre-processing.mat');
load('tree.mat');
index =str2double(get(handles.IndexInputUT,'String'));
dataIndex = Testing3(index, 2:end);
hasilclass = testingTree(Testing3(index, 2:end), t, Training3(:, 1))'; save('UT.mat', 'hasilclass');
% a = hasilclass;
% %check the boolean condition % if a == 1
% % if condition is true then print the following % fprintf('Normal' );
% elseif( a == 2)
% % if else if condition is true % fprintf('Kanker Paru - paru)' ); % elseif a == 3
% fprintf('kanker payudara' ); % else
% % if none of the conditions is true ' % fprintf('maaf anda salah');
% end
set(handles.HasilUT, 'String', strcat(' ', int2str(hasilclass)));
set(handles.UTData,'Data',dataIndex);
function IndexInputUT_Callback(hObject, eventdata, handles) % hObject handle to IndexInputUT (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA)
% Hints: get(hObject,'String') returns contents of IndexInputUT as text % str2double(get(hObject,'String')) returns contents of IndexInputUT as a double
% --- Executes during object creation, after setting all properties.
function IndexInputUT_CreateFcn(hObject, eventdata, handles) % hObject handle to IndexInputUT (see GCBO)
% eventdata reserved - to be defined in a future version of MATLAB % handles empty - handles not created until after all CreateFcns called
% Hint: edit controls usually have a white background on Windows. % See ISPC and COMPUTER.
if ispc && isequal(get(hObject,'BackgroundColor'),
get(0,'defaultUicontrolBackgroundColor'))
set(hObject,'BackgroundColor','white');