4. Membuat fungsi utnuk proses text preprocessing

(1)

LISTING PROGRAM

1. Instalasi library textblob, Sastrawi, nltk, dan swifter

2. Import nltk serta download package punkt dan stopwords dari nltk

3. Import package yang dibutuhkan untuk proses analisis dan membaca dataset dari google drive menggunakan pandas

!pip install textblob

!pip install Sastrawi

!pip install nltk

!pip install swifter

import nltk

nltk.download('punkt') nltk.download('stopwords')

import pandas as pd import numpy as np import swifter

import string, re, json

from nltk.tokenize import word_tokenize from nltk.corpus import stopwords

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory

file = '1DissDIySHLAQT_ofE69SsWE_p6IIfVTv' url = f'https://drive.google.com/uc?id={file}' df = pd.read_csv(url, sep="|")

(2)

4. Membuat fungsi utnuk proses text preprocessing

def cleanTxt(text):

text = re.sub(r'https?:\/\/\S+', '', text) text = re.sub(r'@[A-Za-z0-9]+', '', text) text = re.sub(r'RT[\s]+', '', text)

text = re.sub(r'#+', '', text) text = re.sub(r'\d+', '', text) text = re.sub(r'\n|<br>', ' ', text)

text = re.sub(r'<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a- f]{1,6});', '',text)

text = re.sub(r'(.)\1{1,}', r'\1', text) text =

text.translate(str.maketrans("","",string.punctuation)) text = text.lower()

text = text.encode('ascii', 'ignore').decode('utf-8') text = re.sub(r'[^\x00-\x7f]',r'', text)

text = text.strip() #remove strip

with open("/content/slang_words.txt") as f:

slang_string = f.read()

slang_word = json.loads(slang_string)

text = " ".join(slang_word.get(word, word) for word in text.split(' '))

return text

def stopword(text):

tags = word_tokenize(text) wordlist = []

for word in tags:

if word not in set(stopwords.words('indonesian')):

wordlist.append(word) text = " ".join(wordlist) return text

def stemming(text):

factory = StemmerFactory()

stemmer = factory.create_stemmer() text = stemmer.stem(text)

return text

(3)

5. Membuat salinan dataframe dan mengaplikasikan fungsi text preprocessing

6. Mengubah data kolom label menjadi angka

7. Implementasi TF-IDF pada dataset

8. Implementasi metode Support Vector Machine

df_copy = df.copy()

df_copy['cleaned'] = df_copy['Tweet'].apply(cleanTxt) df_copy['stopword'] = df_copy['cleaned'].apply(stopword) df_copy['Preprocess'] =

df_copy['stopword'].swifter.apply(stemming)

df_new_copy = df_copy[['Preprocess','Label']]

df_new_copy['Label'] = [0 if x == "Negative" else 1 for x in df_new_copy['Label']]

from sklearn.model_selection import train_test_split from sklearn.feature_extraction.text import

TfidfVectorizer

x_train, x_test, y_train, y_test =

train_test_split(df_new_copy['Preprocess'],

df_new_copy['Label'], test_size=0.2, random_state=0) vectorizer = TfidfVectorizer()

X_train = vectorizer.fit_transform(x_train) X_test = vectorizer.transform(x_test)

from sklearn import svm

from sklearn.model_selection import cross_val_score clf = svm.SVC(kernel='linear')

classifier = clf.fit(X_train,y_train) predict = classifier.predict(X_test)

(4)

9. Proses evaluasi

10. Visualisasi word cloud

from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix, accuracy_score tn, fp, fn, tp = confusion_matrix(y_test,

predict).ravel() print(tn, fp, fn, tp)

print("f1 score hasil prediksi adalah:

{:.2f}%".format(f1_score(y_test, predict) * 100)) print("accuracy score hasil prediksi adalah:

{:.2f}%".format(accuracy_score(y_test, predict) * 100)) print("precision score hasil prediksi adalah:

{:.2f}%".format(precision_score(y_test, predict) * 100)) print("recall score hasil prediksi adalah:

{:.2f}%".format(recall_score(y_test, predict) * 100))

from wordcloud import WordCloud import matplotlib.pyplot as plt

all_text_cleaned = ' '.join(word for word in df_new_copy['Preprocess'])

wordcloud_cleaned = WordCloud(width=1000, height=1000, mode='RGBA',

background_color='white').generate(all_text_cleaned) plt.figure(figsize=(20,10))

plt.imshow(wordcloud_cleaned, interpolation='bilinear') plt.axis("off")

plt.margins(x=0, y=0) plt.show()

(5)

11. Implementasi hasil model dengan Flask API

import os

import json import pickle

from flask import Flask, request, jsonify from common.helper import preprocess_data from flask_cors import CORS, cross_origin

app = Flask(__name__) cors = CORS(app)

app.config['CORS_HEADERS'] = 'Content-Type'

root_dir = os.path.dirname(os.path.abspath(__file__)) model =

pickle.load(open(root_dir+"/model/nlp_model_indh.sav",

"rb"))

@app.route('/model', methods=['GET','POST'])

@cross_origin() def predict():

reqBody = json.loads(request.data) string = reqBody['string']

out = model.predict(preprocess_data(string)) if out[0] == int(0): result = "Negative"

else: result = "Positive"

print(result)

return jsonify({"sentiment":result}) if __name__ == '__main__':

app.run(host='0.0.0.0',port=5000,debug=True)