Import the libraries

(1)

# Import the libraries

import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score

import seaborn as sns # plot tools

import pandas as pd

# Memilih file yang diunggah

uploaded_file = 'TCS1.csv' # Ganti dengan nama file yang diunggah

# Read the file

Prgoo = pd.read_csv(uploaded_file,sep=',',index_col='Date')

# Prices is the predict value and initial the independet variable (y) prices = Prgoo['Close'].tolist()

initial = (Prgoo['Open']).tolist()

#Convert to 1d Vector

prices = np.reshape(prices, (len(prices), 1)) initial = np.reshape(initial, (len(initial), 1))

Prgoo.head(5)

(2)

Prgoo[['Open']].plot() plt.title('Open Price') plt.show()

Prgoo[['Close']].plot() plt.title('Close Price') plt.show()

import seaborn as sns

import matplotlib.pyplot as plt

# Menggunakan sns.displot untuk Open

sns.displot(Prgoo['Open'], kde=True, linewidth=5, label='Open')

# Menggunakan sns.displot untuk Close

sns.displot(Prgoo['Close'], kde=True, linewidth=3, label='Close')

# Menambahkan label sumbu y plt.ylabel('Density')

# Menampilkan legenda plt.legend()

# Menampilkan plot plt.show()

(3)

# Import the libraries import numpy as np import pandas as pd

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score

# Memilih file yang diunggah

uploaded_file = 'TCS1.csv' # Ganti dengan nama file yang diunggah

# Read the file

Prgoo = pd.read_csv(uploaded_file, sep=',', index_col='Date')

# Handling missing values (NaN) in the dataset Prgoo.dropna(inplace=True)

# Prices is the predict value and initial the independent variable (y) prices = Prgoo['Close'].tolist()

initial = Prgoo['Open'].tolist()

# Convert to 1d Vector

prices = np.reshape(prices, (len(prices), 1)) initial = np.reshape(initial, (len(initial), 1))

# Splitting the dataset into the Training set and Test set

prices_train, prices_test, initial_train, initial_test = train_test_split(prices, initial, test_size=0.2, random_state=0)

# Initializing the Linear Regression model regressor = LinearRegression()

# Training the model

(4)

regressor.fit(initial_train, prices_train)

# Predicting the Test set results

prices_pred = regressor.predict(initial_test)

# Print R-squared for Test set

r2 = r2_score(prices_test, prices_pred) print(f'Test-set R2 score: {r2}')

# Visualizing the Linear Regression line on Training set

plt.scatter(initial_train, prices_train, color='red', label='Actual Price') # plotting the initial datapoints

plt.plot(initial_train, regressor.predict(initial_train), color='blue', linewidth=3, label='Predicted Price') # plotting the line made by linear regression

plt.title('Linear Regression price | Open vs. Close') plt.xlabel('Price')

plt.ylabel('') plt.legend() plt.show()

# Print R-squared for Test set

r2 = r2_score(prices_test, prices_pred) print(f'Test-set R2 score: {r2}')

# Visualizing the Test set results

plt.scatter(initial_test, prices_test, color='red') plt.plot(initial_test, prices_pred, color='blue') plt.title('Open vs Close (Test set)')

plt.title('Linear Regression price | Open vs. Close') plt.xlabel('Price')

plt.ylabel('') plt.show()

# Evaluating the model

(5)

r2 = r2_score(prices_test, prices_pred) print(f'R-squared value: {r2}')

# Visualizing the Actual vs Predicted Prices plt.figure(figsize=(8, 6))

# Scatter plot for Actual vs. Predicted values plt.scatter(prices_test, prices_pred, color='blue')

plt.plot(prices_test, prices_test, color='red', linewidth=2, label='Actual Price') # Plotting the diagonal line for actual values

plt.title('Actual vs Predicted Prices') plt.xlabel('Actual Price')

plt.ylabel('Predicted Price') plt.legend()

plt.grid(True) plt.show()