LAMPIRAN A: Kode Program
1.
getrss.php
<?php
if (@$_GET["load"] == "stemming") include("step2a_stemming.php"); else if (@$_GET["load"] == "bobot")
include("step3_bobot_dan_normalisasi.php"); else if (@$_GET["load"] == "relevansi") include("step5_tabel_relevansi.php"); ?>
<?php
set_time_limit(300); error_reporting(0); include 'koneksi.php'; $q = $_GET["q"];
$xml = $q;
$xmlDoc = new DOMDocument(); $xmlDoc->load($xml);
$channel = $xmlDoc->getElementsByTagName('channel')->item(0); $channel_title = $channel->getElementsByTagName('title')
->item(0)->childNodes->item(0)->nodeValue;
$channel_link = $channel->getElementsByTagName('link') ->item(0)->childNodes->item(0)->nodeValue;
$channel_desc = $channel->getElementsByTagName('description') ->item(0)->childNodes->item(0)->nodeValue;
echo("<p><a href='" . $channel_link. "'>" . $channel_title . "</a>"); echo("<br />");
echo($channel_desc . "</p>");
$x = $xmlDoc->getElementsByTagName('item'); $counter_content = 0;
for ($i = 0; $i <= 200; $i++) {
$item_title = $x->item($i)->getElementsByTagName('title') ->item(0)->childNodes->item(0)->nodeValue;
$item_link = $x->item($i)->getElementsByTagName('link') ->item(0)->childNodes->item(0)->nodeValue;
$item_desc = $x->item($i)->getElementsByTagName('description') ->item(0)->childNodes->item(0)->nodeValue;
echo ("<p><a href='" . $item_link . "'>" . $item_title . "</a>"); echo ("<br />");
echo ($item_desc . "</p>");
$item_title = stripslashes($item_title);
$item_title = mysql_real_escape_string($item_title); $item_title = str_replace("'", " ", $item_title); $item_desc = stripslashes($item_desc);
$item_desc = mysql_real_escape_string($item_desc); $item_desc = mysql_escape_string($item_desc); $item_desc = str_replace("'", " ", $item_desc);
$cekdata = "select judul from tabel_konten_temp where judul='$item_title'";
$ada = mysql_query($cekdata) or die(mysql_error()); if (mysql_num_rows($ada) > 0) {
} else {
$teks = stripslashes(strip_tags($item_desc));
$kategori = $_GET['kategori']; if (strlen($teks) > 200) {
mysql_query("INSERT INTO tabel_konten_temp(url, judul, konten_html, konten_teks, kategori) VALUES('$item_link', '$item_title', '$item_desc', '$teks', '$kategori')") or die(mysql_error()
} }
} ?>
2.
porterstemmer.php
public static function Stem($word) { if (strlen($word) <= 2) {
return $word; }
$word = self::step1ab($word); $word = self::step1c($word); $word = self::step2($word); $word = self::step3($word); $word = self::step4($word); $word = self::step5($word);
return $word; }
private static function step1ab($word) { if (substr($word, -1) == 's') {
self::replace($word, 'sses', 'ss') OR self::replace($word, 'ies', 'i') OR self::replace($word, 'ss', 'ss') OR self::replace($word, 's', '');
if (substr($word, -2, 1) != 'e' OR !self::replace($word, 'eed', 'ee', 0)) {
$v = self::$regex_vowel;
if (preg_match("#$v+#", substr($word, 0, -3)) && self::replace($word, 'ing', '')
OR preg_match("#$v+#", substr($word, 0, -2)) && self::replace($word, 'ed', '')) {
if (!self::replace($word, 'at', 'ate')
AND !self::replace($word, 'bl', 'ble') AND !self::replace($word, 'iz', 'ize')) { if (self::doubleConsonant($word)
AND substr($word, -2) != 'll' AND substr($word, -2) != 'ss' AND substr($word, -2) != 'zz') {
$word = substr($word, 0, -1);
} else if (self::m($word) == 1 AND self::cvc($word)) { $word .= 'e';
} } }
return $word; }
if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
self::replace($word, 'y', 'i'); }
return $word; }
private static function step2($word) { switch (substr($word, -2, 1)) { case 'a':
self::replace($word, 'ational', 'ate', 0) OR self::replace($word, 'tional', 'tion', 0); break;
case 'c':
self::replace($word, 'enci', 'ence', 0) OR self::replace($word, 'anci', 'ance', 0); break;
case 'e':
self::replace($word, 'izer', 'ize', 0); break;
case 'g':
self::replace($word, 'logi', 'log', 0); break;
case 'l':
self::replace($word, 'entli', 'ent', 0) OR self::replace($word, 'ousli', 'ous', 0) OR self::replace($word, 'alli', 'al', 0) OR self::replace($word, 'bli', 'ble', 0) OR self::replace($word, 'eli', 'e', 0); break;
case 'o':
self::replace($word, 'ization', 'ize', 0) OR self::replace($word, 'ation', 'ate', 0) OR self::replace($word, 'ator', 'ate', 0); break;
case 's':
self::replace($word, 'iveness', 'ive', 0) OR self::replace($word, 'fulness', 'ful', 0) OR self::replace($word, 'ousness', 'ous', 0) OR self::replace($word, 'alism', 'al', 0); break;
case 't':
self::replace($word, 'biliti', 'ble', 0) OR self::replace($word, 'aliti', 'al', 0) OR self::replace($word, 'iviti', 'ive', 0); break;
}
return $word; }
private static function step3($word) { switch (substr($word, -2, 1)) { case 'a':
self::replace($word, 'ical', 'ic', 0); break;
case 's':
self::replace($word, 'ness', '', 0); break;
case 't':
break; case 'u':
self::replace($word, 'ful', '', 0); break;
case 'v':
self::replace($word, 'ative', '', 0); break;
case 'z':
self::replace($word, 'alize', 'al', 0); break;
}
return $word; }
private static function step4($word) { switch (substr($word, -2, 1)) { case 'a':
self::replace($word, 'al', '', 1); break;
case 'c':
self::replace($word, 'ance', '', 1) OR self::replace($word, 'ence', '', 1); break;
case 'e':
self::replace($word, 'er', '', 1); break;
case 'i':
self::replace($word, 'ic', '', 1); break;
case 'l':
self::replace($word, 'able', '', 1) OR self::replace($word, 'ible', '', 1); break;
case 'n':
self::replace($word, 'ant', '', 1) OR self::replace($word, 'ement', '', 1) OR self::replace($word, 'ment', '', 1) OR self::replace($word, 'ent', '', 1); break;
case 'o':
if (substr($word, 4) == 'tion' OR substr($word, -4) == 'sion') {
self::replace($word, 'ion', '', 1); } else {
self::replace($word, 'ou', '', 1); }
break; case 's':
self::replace($word, 'ism', '', 1); break;
case 't':
self::replace($word, 'ate', '', 1) OR self::replace($word, 'iti', '', 1); break;
case 'u':
self::replace($word, 'ous', '', 1); break;
case 'v':
case 'z':
self::replace($word, 'ize', '', 1); break;
}
return $word; }
private static function step5($word) { if (substr($word, -1) == 'e') {
if (self::m(substr($word, 0, -1)) > 1) { self::replace($word, 'e', '');
} else if (self::m(substr($word, 0, -1)) == 1) { if (!self::cvc(substr($word, 0, -1))) { self::replace($word, 'e', '');
} } }
if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') {
$word = substr($word, 0, -1); }
return $word; }
private static function replace(&$str, $check, $repl, $m = null) { $len = 0 - strlen($check);
if (substr($str, $len) == $check) { $substr = substr($str, 0, $len);
if (is_null($m) OR self::m($substr) > $m) { $str = $substr . $repl;
}
return true; }
return false; }
private static function m($str) { $c = self::$regex_consonant; $v = self::$regex_vowel;
$str = preg_replace("#^$c+#", '', $str); $str = preg_replace("#$v+$#", '', $str); preg_match_all("#($v+$c+)#", $str, $matches); return count($matches[1]);
}
private static function doubleConsonant($str) { $c = self::$regex_consonant;
return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
}
private static function cvc($str) { $c = self::$regex_consonant; $v = self::$regex_vowel;
return preg_match("#($c$v$c)$#", $str, $matches) AND strlen($matches[1]) == 3
AND $matches[1]{2} != 'w' AND $matches[1]{2} != 'x' AND $matches[1]{2} != 'y'; }
3.
stemming.php
function stemming($page_word, $maks_keyword_return) {
$stop_words = array("a", "able", "about", "above", "abroad", "abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "adopted", "affected", "affecting", "affects", "after", "afterwards", "again", "against", "ago", "ah", "ahead", "ain't", "all", "allow", "allows", "almost", "alone", "along", "alongside", "already", "also", "although", "always", "am", "amid", "amidst", "among", "amongst", "amoungst", "amount", "an", "and", "announce", "another", "any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "apart",
"apparently", "appear", "appreciate", "appropriate",
"approximately", "are", "aren", "arent", "aren't", "arise", "around", "as", "a's", "aside", "ask", "asking", "associated", "at", "auth", "available", "away", "awfully", "b", "back", "backward", "backwards", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "beginning", "beginnings", "begins", "behind", "being",
"believe", "below", "beside", "besides", "best", "better", "between", "beyond", "bill", "biol", "both", "bottom", "brief", "briefly", "but", "by", "c", "ca", "call", "came", "can",
"cannot", "cant", "can't", "caption", "cause", "causes", "certain", "certainly", "changes", "clearly", "c'mon", "co", "co.", "com", "come", "comes", "computer", "con", "concerning", "consequently", "consider", "considering", "contain",
"containing", "contains", "corresponding", "could", "couldnt", "couldn't", "course", "cry", "c's", "currently", "d", "dare", "daren't", "date", "de", "definitely", "describe", "described", "despite", "detail", "did", "didn't", "different", "directly", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "due", "during", "e", "each", "ed", "edu",
"last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "lets", "let's", "like", "liked", "likely", "likewise", "line", "little", "'ll", "look", "looking", "looks", "low", "lower", "ltd", "m", "made", "mainly", "make", "makes", "many", "may", "maybe", "mayn't", "me", "mean", "means", "meantime", "meanwhile", "merely", "mg", "might", "mightn't", "mill", "million", "mine", "minus",
"miss", "ml", "more", "moreover", "most", "mostly", "move", "mr", "mrs", "much", "mug", "must", "mustn't", "my", "myse”", "myself", "n", "na", "name", "namely", "nay", "nd", "near", "nearly", "necessarily", "necessary", "need", "needn't", "needs", "neither", "never", "neverf", "neverless", "nevertheless", "new", "next", "nine", "ninety", "no", "nobody", "non", "none", "nonetheless", "noone", "no-one", "nor", "normally", "nos", "not", "noted", "nothing",
"notwithstanding", "novel", "now", "nowhere", "o", "obtain", "obtained", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "omitted", "on", "once", "one", "ones", "one's", "only", "onto", "opposite", "or", "ord", "other", "others", "otherwise", "ought", "oughtn't", "our", "ours", "ours ", "ourselves", "out", "outside", "over", "overall", "owing", "own", "p", "page", "pages", "part", "particular",
"particularly", "past", "per", "perhaps", "placed", "please", "plus", "poorly", "possible", "possibly", "potentially", "pp", "predominantly", "present", "presumably", "previously",
"primarily", "probably", "promptly", "proud", "provided", "provides", "put", "q", "que", "quickly", "quite", "qv", "r", "ran", "rather", "rd", "re", "readily", "really", "reasonably", "recent", "recently", "ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "respectively", "resulted", "resulting", "results", "right", "round", "run", "s", "said", "same", "saw", "say", "saying", "says", "sec", "second", "secondly", "section", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves",
"sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't", "she", "shed", "she'd", "she'll", "shes", "she's", "should", "shouldn't", "show", "showed", "shown", "showns", "shows", "side", "significant", "significantly", "similar", "similarly", "since", "sincere", "six", "sixty", "slightly", "so", "some", "somebody", "someday", "somehow", "someone", "somethan", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specifically", "specified", "specify", "specifying", "state", "states", "still", "stop", "strongly", "sub", "substantially", "successfully", "such", "sufficiently", "suggest", "sup", "sure", "system", "t", "take", "taken", "taking", "tell", "ten", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that'll", "thats", "that's", "that've", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "thered", "there'd", "therefore", "therein", "there'll", "thereof", "therere", "there're", "theres", "there's", "thereto", "thereupon", "there've", "these", "they", "theyd", "they'd", "they'll", "theyre", "they're", "they've", "thick", "thin", "thing", "things", "think", "third", "thirty", "this", "thorough",
"thoroughly", "those", "thou", "though", "thoughh", "thousand", "three", "throug", "through", "throughout", "thru", "thus", "til", "till", "tip", "to", "together", "too", "took", "top", "toward", "towards", "tried", "tries", "truly", "try",
"un", "under", "underneath", "undoing", "unfortunately",
"unless", "unlike", "unlikely", "until", "unto", "up", "upon", "ups", "upwards", "us", "use", "used", "useful", "usefully", "usefulness", "uses", "using", "usually", "v", "value", "various", "'ve", "versus", "very", "via", "viz", "vol", "vols", "vs", "w", "want", "wants", "was", "wasn't", "way", "we", "wed", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've", "what", "whatever", "what'll", "whats", "what's", "what've", "when", "whence", "whenever", "when's", "where", "whereafter", "whereas", "whereby", "wherein", "wheres", "where's", "whereupon",
"wherever", "whether", "which", "whichever", "while", "whilst", "whim", "whither", "who", "whod", "who'd", "whoever", "whole", "who'll", "whom", "whomever", "whos", "who's", "whose", "why", "why's", "widely", "will", "willing", "wish", "with", "within", "without", "wonder", "won't", "words", "world", "would",
"wouldn't", "www", "x", "y", "yes", "yet", "you", "youd", "you'd", "you'll", "your", "youre", "you're", "yours", "yourself", "yourselves", "you've", "z", "zero");
$words = explode(" ", strtolower($page_word)); $stem_words = array();
foreach ($words as $word) { $word = strtolower($word); $word = ambil_abjad($word); $word = hapus_2_karakter($word); if (!in_array($word, $stop_words)) {
$stem = PorterStemmer::Stem($word); /* Remove stop words */
$stem = hapus_2_karakter($stem); if (!in_array($stem, $stop_words)) {
if ($stem != "") { $stem_words[] = $stem; }
} } }
$hitung_keyword = array_count_values($stem_words); arsort($hitung_keyword);
$keyword_tertinggi = array_slice($hitung_keyword, 0, $maks_keyword_return);
return $keyword_tertinggi; }
function ambil_abjad($kata) {
return preg_replace('/[^A-Za-z]/', '', $kata); }
function hapus_2_karakter($kata) { if (strlen($kata) > 2) {
return $kata; } else {
return ""; }
}
4.
hitungrelevansi.php
<?php
$query_topik = mysql_query("SELECT * FROM tabel_keyword") or die(mysql_error());
while ($row_topik = mysql_fetch_array($query_topik)) { $id_topik = $row_topik['id_konten'];
$keyword_topiks = unserialize($row_topik['keyword']);
$query_page = mysql_query("SELECT * FROM tabel_keyword WHERE id_konten != $id_topik ORDER BY id_konten ASC") or
die(mysql_error());
while ($row_page = mysql_fetch_array($query_page)) { $id_page = $row_page['id_konten'];
$keyword_pages = unserialize($row_page['keyword']); $wmax_topik = wmaks($keyword_topiks);
$wmax_page = wmaks($keyword_pages); $wkt_sama = 0;
$wkp_sama = 0;
$keyword_topik_sama = ""; $keyword_page_sama = ""; $keyword_topik = ""; $keyword_page = "";
foreach ($keyword_topiks as $key_topik => $jumlah_topik) { foreach ($keyword_pages as $key_page => $jumlah_page) {
if (trim($key_topik) == trim($key_page) &&
trim($key_page) != "" && trim($key_topik) != "") { $wkt_sama = ($jumlah_topik / $wmax_topik) + $wkt_sama; $wkp_sama = ($jumlah_page / $wmax_page) + $wkp_sama;
$keyword_topik_sama = $keyword_topik_sama . " " . $key_topik . "=" . $jumlah_topik;
$keyword_page_sama =$keyword_page_sama . " " . $key_page . "=" . $jumlah_page;
} }
}
//mencari total wkt $wkt = 0;
foreach ($keyword_topiks as $key_topik => $jumlah_topik) { $wkt = $wkt + ($jumlah_topik / $wmax_topik);
//untuk menampilkan keyword topik
$keyword_topik = $key_topik . "=" . $jumlah_topik . " " . $keyword_topik;
}
$wkt_pangkat = pow($wkt, 2); //mencari wkp
$wkp = 0;
foreach ($keyword_pages as $key_page => $jumlah_page) { $wkp = $wkp + ($jumlah_page / $wmax_page);
//untuk menampilkan keyword page
$keyword_page = $key_page . "=" . $jumlah_page . " " . $keyword_page; }
$wkp_pangkat = pow($wkp, 2);
$akar_wkt_pangkat_dan_wkp_pangkat = sqrt($wkt_pangkat * $wkp_pangkat);
//menghindari division by zero
if ($akar_wkt_pangkat_dan_wkp_pangkat != 0) {
$relevansi_topik_page = ($wkt_sama * $wkp_sama) / $akar_wkt_pangkat_dan_wkp_pangkat;
}
//jika nilai relevansinya ada
if (isset($relevansi_topik_page) && $relevansi_topik_page != 0) { if ($wkp_sama == 0 || $wkt_sama == 0) {
$wkt = 0; $relevansi_topik_page = 0; }
insert_relevansi($id_topik, $id_page, $keyword_topik_sama, $keyword_page_sama, $wkt_sama, $wkp_sama, $wkt, $wkp, $relevansi_topik_page);
} ?>
5.
pencarian.php
<?php
include 'crawler1/PorterStemmer.php'; @$cari = $_GET['cari'];
$array_cari = explode(" ", strtolower($cari)); $q_cari = "";
if (count($array_cari) < 1) {
$q_cari = " judul LIKE '%$array_cari[0]%' OR "." keyword LIKE '%$array_cari[0]%'";
} else {
for ($i = 0; $i < count($array_cari); $i++) {
$q_cari = $q_cari." judul LIKE '%$array_cari[0]%' OR "." konten_teks LIKE '%$array_cari[0]%'";
if($i<count($array_cari)-1){ $q_cari = $q_cari. " OR "; }
} }
@$kategori = $_GET['kategori']; $per_page = 10;
$q = "SELECT count(*) FROM tabel_konten WHERE $q_cari AND kategori='$kategori'";
$page_query = mysql_query($q) or die(mysql_error()); //echo $q;
$pages = ceil(mysql_result($page_query, 0) / $per_page); $page = (isset($_GET['page'])) ? (int) $_GET['page'] : 1; $start = ($page - 1) * $per_page;
//$sql = mysql_query("SELECT id_konten,(SELECT judul FROM
tabel_konten WHERE tabel_konten.id_konten=tabel_keyword.id_konten AND tabel_konten.kategori='$kategori') as judul FROM tabel_keyword WHERE $q_cari LIMIT $start, $per_page") or die(mysql_error());
$sql = mysql_query("SELECT * FROM tabel_konten WHERE $q_cari LIMIT $start, $per_page") or die(mysql_error());
while ($row = mysql_fetch_array($sql)) { $id_konten = $row['id_konten'];
$qs = mysql_query("SELECT * FROM tabel_konten WHERE id_konten=" . $id_konten . " AND
kategori='$kategori'") or die(mysql_error()); if (mysql_num_rows($qs) > 0) {
$judul = ucfirst(strtolower($row['judul'])); $judul = str_replace('<h3>', '<h3><a
href="baca.php?id=' . $id_konten . '">', $judul); $judul = str_replace('</h3>', '</a></h3>', $judul);
echo "<a href='baca.php?id=$id_konten'>" . $judul . "</a/><br/>";
} } ?> <?php
for ($x = 1; $x <= $pages; $x++) {
echo ($x == $page) ? '<b><a href="?cari=' . $cari . '&kategori=' . $kategori . '&page=' . $x . '">' . $x . '</a></b>' : '<a href="?cari=' . $cari . '&kategori=' . $kategori . '&page=' . $x . '">' . $x . '</a> ';