• Tidak ada hasil yang ditemukan

Proses ini berfungsi untuk membantu proses dalam membaca dokumen dan juga sekaligus memproses dokumen menjadi data yang siap di klasifikasikan.

4.2.1 Class Master

Berfungsi sebagai fungsi untuk menyimpan dokumen yang sudah diproses dalam proses kata dasar dan pembatasan kata yang tidak diperlukan.

public class Master { List<Kelas> classes; List<Dokumen> documents; String stem, stopword; Set<String> terms; Dokumen testDoc;

public Dokumen getTestDoc() { return testDoc; }

public void setTestDoc(Dokumen testDoc) { this.testDoc = testDoc; d : documents) { d.setFileKamusStemWord(stem); } }

public String getStopword() { return stopword;

}

public String getStem() { return stem;

}

public void setStem(String stem) { this.stem = stem;

for (Dokumen d : documents) { d.setFileKamusStemWord(stem); }

}

public String getStopword() { return stopword;

}

public void setStopword(String stopword) { this.stopword = stopword;

for (Dokumen d : documents) {

d.setFileKamusStopWord(stopword); }

}

public void preprocess() { terms = new HashSet<>(); for (Dokumen d : documents) { d.Preprocessing();

terms.addAll(d.daftarKata); }

Map<String, Integer> tf = new HashMap<>(); for (final String term : terms) {

int freq = tf.containsKey(term) ? tf.get(term) : 0; for (Dokumen doc : documents) {

freq += Collections2.filter(doc.daftarKata, new Predicate<String>() {

@Override

public boolean apply(String input) { return term.equals(input); } }).size(); } tf.put(term, freq); } double f = 0;

for (Integer freq : tf.values()) { f += freq.doubleValue(); }

}

public Master(String path) {

File[] files = new File(path).listFiles(); documents = new ArrayList<>(files.length); classes = new ArrayList<>(files.length); for (File file : files) {

if (file.isDirectory()) { Kelas kelas = new Kelas(); kelas.setNama(file.getName()); classes.add(kelas);

4.2.2 Class Kelas

Memberi nama dalam setiap dokumen yang diambil sesuai dengan label.

File[] files = new File(path).listFiles(); documents = new ArrayList<>(files.length); classes = new ArrayList<>(files.length); for (File file : files) {

if (file.isDirectory()) { Kelas kelas = new Kelas(); kelas.setNama(file.getName()); classes.add(kelas);

File[] files2 = file.listFiles(); for (File file1 : files2) {

Dokumen doc = new Dokumen(file1); kelas.addDokumen(doc); documents.add(doc); } } else { documents.add(new Dokumen(file)); } } }

public Kelas getKelas(Dokumen d) { for (Kelas kelas : classes) {

if (kelas.getDaftarDokumen().contains(d)) { return kelas; } } return null; } }

List Code 4. 1 Class Master

public class Kelas { private String nama;

private List<Dokumen> daftarDokumen; public Kelas() {

daftarDokumen = new ArrayList<>(); }

public void setNama(String name) { nama = name;

}

public List<Dokumen> getDaftarDokumen() { return daftarDokumen;

}

public void addDokumen(Dokumen dokumen) { daftarDokumen.add(dokumen);

}

4.2.3 Class Dokumen

public void addDokumen(Dokumen dokumen) { daftarDokumen.add(dokumen);

}

public String getNama() { return nama;

} }

List Code 4. 2 Class Kelas

public class Dokumen {

List<String> kamusStopWord; Set<String> kamusKataDasar; String filename;

List<String> daftarKata;

private boolean tokenized, removedStopWords, stemmed; private String fileKamusStopWord, fileKamusStemWord; public String getFileKamusStopWord() {

return fileKamusStopWord; }

public void setFileKamusStopWord(String fileKamusStopWord) { this.fileKamusStopWord = fileKamusStopWord;

}

public String getFileKamusStemWord() { return fileKamusStemWord;

}

public void setFileKamusStemWord(String fileKamusStemWord) { this.fileKamusStemWord = fileKamusStemWord;

}

public Iterator<String> iterator() { return daftarKata.iterator(); }

public int size() {

return daftarKata.size(); }

public Dokumen(File file) { this(file.getPath()); }

public Dokumen(String path) { daftarKata = new ArrayList<>(); filename = path;

}

public void Tokenize() { if (tokenized) {

public Dokumen(String path) { daftarKata = new ArrayList<>(); filename = path;

}

public void Tokenize() { if (tokenized) { return; } try {

FileInputStream fis = new FileInputStream(filename); Scanner s = new Scanner(fis);

if (daftarKata == null) {

daftarKata = new ArrayList<>(); } else {

daftarKata.clear(); }

while (s.hasNext()) { String text = s.next();

String kata = text.replaceAll("[^a-zA-Z&&[^\\-]]", ""); //token.nextToken();

daftarKata.add(kata); }

tokenized = true;

} catch (FileNotFoundException ex) { tokenized = false;

Logger.getLogger(Dokumen.class.getName()).log(Level.SEVERE, null, ex);

} }

public void bacakamus(String path) { try {

FileInputStream fis = new FileInputStream(path); Scanner s = new Scanner(fis);

kamusKataDasar = new HashSet<>(); while (s.hasNext()) {

kamusKataDasar.add(s.next()); }

} catch (FileNotFoundException ex) {

Logger.getLogger(Tampilstopword.class.getName()).log(Level.SEVERE, null, ex);

} }

public void bacakamusstopword(String filename) { try {

FileInputStream FIS = new FileInputStream(filename); Scanner sc = new Scanner(FIS);

kamusStopWord = new ArrayList<>(); while (sc.hasNext()) {

kamusStopWord.add(sc.next()); }

} catch (FileNotFoundException ex) {

Logger.getLogger(Tampilstopword.class.getName()).log(Level.SEVERE, null, ex);

kamusStopWord = new ArrayList<>(); while (sc.hasNext()) {

kamusStopWord.add(sc.next()); }

} catch (FileNotFoundException ex) {

Logger.getLogger(Tampilstopword.class.getName()).log(Level.SEVERE, null, ex);

} }

public void hapusstopword() { if (removedStopWords) { return;

}

if (kamusStopWord == null) {

throw new IllegalStateException("Kamus stopword belum ada"); }

if (!tokenized) {

throw new IllegalStateException("Dokumen belum di-tokenize"); }

Iterator<String> i = daftarKata.iterator(); while (i.hasNext()) {

String token = i.next();

if (kamusStopWord.contains(token)) { i.remove(); } } removedStopWords = true; }

public void stem() { if (stemmed) { return; }

if (kamusKataDasar == null || kamusKataDasar.isEmpty()) { throw new IllegalStateException("Kamus kata dasar belum ada"); }

if (!removedStopWords) {

throw new IllegalStateException("Stopwords belum dibuang"); }

Lemmatizer stemmer = new DefaultLemmatizer(kamusKataDasar); for (int i = 0; i < daftarKata.size(); i++) {

String kata = daftarKata.get(i);

daftarKata.set(i, stemmer.lemmatize(kata)); }

stemmed = true; }

public void Preprocessing() { Tokenize(); bacakamusstopword(fileKamusStopWord); hapusstopword(); bacakamus(fileKamusStemWord); stem(); } }

4.2.4 Class Stopword

Berfungsi untuk memanggil kamus stopword bila belum tersedia sekaligus memproses dalam tahapan stopword.

bacakamusstopword(fileKamusStopWord); hapusstopword(); bacakamus(fileKamusStemWord); stem(); } }

List Code 4. 3 Class Dokumen

public class Tampilstopword {

public static void main(String[] args) { JFileChooser path = new JFileChooser();

FileFilter filter = (new FileNameExtensionFilter("Text files (*.txt)", "txt"));

path.addChoosableFileFilter(filter); path.setFileFilter(filter);

int result = path.showOpenDialog(null);

if (result == JFileChooser.APPROVE_OPTION) { File file = path.getSelectedFile();

Dokumen dok = new Dokumen(file); result = path.showOpenDialog(null); if (result == JFileChooser.APPROVE_OPTION) { dok.setFileKamusStopWord(path.getSelectedFile().getPath()); result = path.showOpenDialog(null); if (result == JFileChooser.APPROVE_OPTION) { dok.setFileKamusStemWord(path.getSelectedFile().getPath()); dok.Preprocessing(); System.out.println(dok.daftarKata); } } } }

public static List<String> Tokenize(String Teks) { StringTokenizer token = new

StringTokenizer(Teks.toString().toLowerCase(), " .,()?!-_+:;/*&^%$#@!~[]{}=()");

return Collections.list((Enumeration) token); }

public static List<String> bacakamusstopword(String filename) { try {

FileInputStream FIS = new FileInputStream(filename); Scanner sc = new Scanner(FIS);

List<String> hasil = new ArrayList<>(); while (sc.hasNext()) {

try {

FileInputStream FIS = new FileInputStream(filename); Scanner sc = new Scanner(FIS);

List<String> hasil = new ArrayList<>(); while (sc.hasNext()) {

hasil.add(sc.next()); }

return hasil;

} catch (FileNotFoundException ex) {

Logger.getLogger(Tampilstopword.class.getName()).log(Level.SEV ERE, null, ex);

return null; }

}

public static List<String> hapusstopword(List<String> hasiltoken, List<String> stopwords) {

List<String> hasilstopword = new ArrayList<>(hasiltoken); Iterator<String> i = hasilstopword.iterator();

while (i.hasNext()) { String token = i.next();

if (stopwords.contains(token)) { i.remove(); } } return hasilstopword; }

public static Set<String> bacakamus() { JFileChooser path = new JFileChooser(); int result = path.showOpenDialog(null);

if (result == JFileChooser.APPROVE_OPTION) { File filename = path.getSelectedFile();

FileInputStream fis = null; try {

fis = new FileInputStream(filename); } catch (FileNotFoundException ex) {

Logger.getLogger(Tampilstopword.class.getName()).log(Level.SEV ERE, null, ex);

}

Scanner s = new Scanner(fis);

Set<String> hasil = new HashSet<>(); while (s.hasNext()) { hasil.add(s.next()); } return hasil; } return null; }

public static List<String> stem(Dokumen Dokumen, Set<String> kamus) {

4.2.5 Class Mencari Bobot

Pemprosesan sebuah sistem untuk menentukan bobot di setiap

term yang didapat.

}

return hasil; }

return null; }

public static List<String> stem(Dokumen Dokumen, Set<String> kamus) {

Lemmatizer stemmer = new DefaultLemmatizer(kamus); List<String> hasil = new ArrayList<>(Dokumen.size()); for (String kata : Dokumen.daftarKata) {

hasil.add(stemmer.lemmatize(kata)); }

return hasil; }

}

List Code 4. 4 Class Tampil stopword

public class Mencari_bobot {

public static double dotproduct(double[] i, double[] j) { if (i.length != j.length) {

throw new IllegalArgumentException(); }

double hasil = 0;

for (int k = 0; k < i.length; k++) { hasil += i[k] * j[k];

}

return hasil; }

static class TermDocumentPair { public String term;

public Dokumen doc;

TermDocumentPair(String term, Dokumen doc) { this.term = term;

this.doc = doc; } }

private Set<String> allTerms;

private Map<TermDocumentPair, Integer> tableTF; private Map<String, Integer> tableDF;

private Master master; private Dokumen docTest; public Dokumen getDocTest() { return docTest;

}

public void setDocTest(Dokumen docTest) { this.docTest = docTest;

private Dokumen docTest; public Dokumen getDocTest() { return docTest;

}

public void setDocTest(Dokumen docTest) { this.docTest = docTest;

}

public Mencari_bobot(Master m) { master = m;

allTerms = m.terms;

int size = allTerms.size() * m.documents.size(); tableTF = new HashMap<>(size);

tableDF = new HashMap<>(size); docTest = m.getTestDoc(); }

public int getTermFrequency(String term, Dokumen doc) { TermDocumentPair key = new TermDocumentPair(term, doc); if (tableTF.containsKey(key)) {

return tableTF.get(key); } else {

int count = 0;

for (String kata : doc.daftarKata) { if (kata.equals(term)) { count++; } } tableTF.put(key, count); return count; } }

public int getDocumentFrequency(String term) { if (tableDF.containsKey(term)) {

return tableDF.get(term); } else {

int c = 0;

for (Dokumen d : master.documents) { if (d.daftarKata.contains(term)) { c++; } } tableDF.put(term, c); return c; } } //idf

private double idf1(String term) {

return Math.log(master.documents.size() / getDocumentFrequency(term));

}

private double idf2(String term) {

return 1d / (double) getDocumentFrequency(term); }

private double idf1(String term) {

return Math.log(master.documents.size() / getDocumentFrequency(term));

}

private double idf2(String term) {

return 1d / (double) getDocumentFrequency(term); }

private double idf3(String term) {

return Math.log(1000 / getDocumentFrequency(term)); }

public double getInverseDocumentFrequency(String term, int metode) { switch (metode) { case 1: return idf1(term); case 2: return idf2(term); case 3: return idf3(term); default:

throw new IllegalArgumentException(); }

} //bobot

public double getWeight(String term, Dokumen doc) { return getTermFrequency(term, doc) *

getInverseDocumentFrequency(term,1); }

public double getInnerProduct(String term, Dokumen doc) { return getWeight(term, doc) * getWeight(term, docTest); }

Untuk memanggil semua sistem yang sudah dibuat sehingga dokumen dapat diproses dengan semestinya.

Dokumen terkait