LISTING PROGRAM
1. Instalasi library textblob, Sastrawi, nltk, dan swifter
2. Import nltk serta download package punkt dan stopwords dari nltk
3. Import package yang dibutuhkan untuk proses analisis dan membaca dataset dari google drive menggunakan pandas
!pip install textblob
!pip install Sastrawi
!pip install nltk
!pip install swifter
import nltk
nltk.download('punkt') nltk.download('stopwords')
import pandas as pd import numpy as np import swifter
import string, re, json
from nltk.tokenize import word_tokenize from nltk.corpus import stopwords
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
file = '1DissDIySHLAQT_ofE69SsWE_p6IIfVTv' url = f'https://drive.google.com/uc?id={file}' df = pd.read_csv(url, sep="|")
4. Membuat fungsi utnuk proses text preprocessing
def cleanTxt(text):
text = re.sub(r'https?:\/\/\S+', '', text) text = re.sub(r'@[A-Za-z0-9]+', '', text) text = re.sub(r'RT[\s]+', '', text)
text = re.sub(r'#+', '', text) text = re.sub(r'\d+', '', text) text = re.sub(r'\n|<br>', ' ', text)
text = re.sub(r'<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a- f]{1,6});', '',text)
text = re.sub(r'(.)\1{1,}', r'\1', text) text =
text.translate(str.maketrans("","",string.punctuation)) text = text.lower()
text = text.encode('ascii', 'ignore').decode('utf-8') text = re.sub(r'[^\x00-\x7f]',r'', text)
text = text.strip() #remove strip
with open("/content/slang_words.txt") as f:
slang_string = f.read()
slang_word = json.loads(slang_string)
text = " ".join(slang_word.get(word, word) for word in text.split(' '))
return text
def stopword(text):
tags = word_tokenize(text) wordlist = []
for word in tags:
if word not in set(stopwords.words('indonesian')):
wordlist.append(word) text = " ".join(wordlist) return text
def stemming(text):
factory = StemmerFactory()
stemmer = factory.create_stemmer() text = stemmer.stem(text)
return text
5. Membuat salinan dataframe dan mengaplikasikan fungsi text preprocessing
6. Mengubah data kolom label menjadi angka
7. Implementasi TF-IDF pada dataset
8. Implementasi metode Support Vector Machine
df_copy = df.copy()df_copy['cleaned'] = df_copy['Tweet'].apply(cleanTxt) df_copy['stopword'] = df_copy['cleaned'].apply(stopword) df_copy['Preprocess'] =
df_copy['stopword'].swifter.apply(stemming)
df_new_copy = df_copy[['Preprocess','Label']]
df_new_copy['Label'] = [0 if x == "Negative" else 1 for x in df_new_copy['Label']]
from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import
TfidfVectorizer
x_train, x_test, y_train, y_test =
train_test_split(df_new_copy['Preprocess'],
df_new_copy['Label'], test_size=0.2, random_state=0) vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(x_train) X_test = vectorizer.transform(x_test)
from sklearn import svm
from sklearn.model_selection import cross_val_score clf = svm.SVC(kernel='linear')
classifier = clf.fit(X_train,y_train) predict = classifier.predict(X_test)
9. Proses evaluasi
10. Visualisasi word cloud
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix, accuracy_score tn, fp, fn, tp = confusion_matrix(y_test,
predict).ravel() print(tn, fp, fn, tp)
print("f1 score hasil prediksi adalah:
{:.2f}%".format(f1_score(y_test, predict) * 100)) print("accuracy score hasil prediksi adalah:
{:.2f}%".format(accuracy_score(y_test, predict) * 100)) print("precision score hasil prediksi adalah:
{:.2f}%".format(precision_score(y_test, predict) * 100)) print("recall score hasil prediksi adalah:
{:.2f}%".format(recall_score(y_test, predict) * 100))
from wordcloud import WordCloud import matplotlib.pyplot as plt
all_text_cleaned = ' '.join(word for word in df_new_copy['Preprocess'])
wordcloud_cleaned = WordCloud(width=1000, height=1000, mode='RGBA',
background_color='white').generate(all_text_cleaned) plt.figure(figsize=(20,10))
plt.imshow(wordcloud_cleaned, interpolation='bilinear') plt.axis("off")
plt.margins(x=0, y=0) plt.show()
11. Implementasi hasil model dengan Flask API
import osimport json import pickle
from flask import Flask, request, jsonify from common.helper import preprocess_data from flask_cors import CORS, cross_origin
app = Flask(__name__) cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
root_dir = os.path.dirname(os.path.abspath(__file__)) model =
pickle.load(open(root_dir+"/model/nlp_model_indh.sav",
"rb"))
@app.route('/model', methods=['GET','POST'])
@cross_origin() def predict():
reqBody = json.loads(request.data) string = reqBody['string']
out = model.predict(preprocess_data(string)) if out[0] == int(0): result = "Negative"
else: result = "Positive"
print(result)
return jsonify({"sentiment":result}) if __name__ == '__main__':
app.run(host='0.0.0.0',port=5000,debug=True)