# Import the libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score
import seaborn as sns # plot tools
import pandas as pd
# Memilih file yang diunggah
uploaded_file = 'TCS1.csv' # Ganti dengan nama file yang diunggah
# Read the file
Prgoo = pd.read_csv(uploaded_file,sep=',',index_col='Date')
# Prices is the predict value and initial the independet variable (y) prices = Prgoo['Close'].tolist()
initial = (Prgoo['Open']).tolist()
#Convert to 1d Vector
prices = np.reshape(prices, (len(prices), 1)) initial = np.reshape(initial, (len(initial), 1))
Prgoo.head(5)
Prgoo[['Open']].plot() plt.title('Open Price') plt.show()
Prgoo[['Close']].plot() plt.title('Close Price') plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Menggunakan sns.displot untuk Open
sns.displot(Prgoo['Open'], kde=True, linewidth=5, label='Open')
# Menggunakan sns.displot untuk Close
sns.displot(Prgoo['Close'], kde=True, linewidth=3, label='Close')
# Menambahkan label sumbu y plt.ylabel('Density')
# Menampilkan legenda plt.legend()
# Menampilkan plot plt.show()
# Import the libraries import numpy as np import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import r2_score
# Memilih file yang diunggah
uploaded_file = 'TCS1.csv' # Ganti dengan nama file yang diunggah
# Read the file
Prgoo = pd.read_csv(uploaded_file, sep=',', index_col='Date')
# Handling missing values (NaN) in the dataset Prgoo.dropna(inplace=True)
# Prices is the predict value and initial the independent variable (y) prices = Prgoo['Close'].tolist()
initial = Prgoo['Open'].tolist()
# Convert to 1d Vector
prices = np.reshape(prices, (len(prices), 1)) initial = np.reshape(initial, (len(initial), 1))
# Splitting the dataset into the Training set and Test set
prices_train, prices_test, initial_train, initial_test = train_test_split(prices, initial, test_size=0.2, random_state=0)
# Initializing the Linear Regression model regressor = LinearRegression()
# Training the model
regressor.fit(initial_train, prices_train)
# Predicting the Test set results
prices_pred = regressor.predict(initial_test)
# Print R-squared for Test set
r2 = r2_score(prices_test, prices_pred) print(f'Test-set R2 score: {r2}')
# Visualizing the Linear Regression line on Training set
plt.scatter(initial_train, prices_train, color='red', label='Actual Price') # plotting the initial datapoints
plt.plot(initial_train, regressor.predict(initial_train), color='blue', linewidth=3, label='Predicted Price') # plotting the line made by linear regression
plt.title('Linear Regression price | Open vs. Close') plt.xlabel('Price')
plt.ylabel('') plt.legend() plt.show()
# Print R-squared for Test set
r2 = r2_score(prices_test, prices_pred) print(f'Test-set R2 score: {r2}')
# Visualizing the Test set results
plt.scatter(initial_test, prices_test, color='red') plt.plot(initial_test, prices_pred, color='blue') plt.title('Open vs Close (Test set)')
plt.title('Linear Regression price | Open vs. Close') plt.xlabel('Price')
plt.ylabel('') plt.show()
# Evaluating the model
r2 = r2_score(prices_test, prices_pred) print(f'R-squared value: {r2}')
# Visualizing the Actual vs Predicted Prices plt.figure(figsize=(8, 6))
# Scatter plot for Actual vs. Predicted values plt.scatter(prices_test, prices_pred, color='blue')
plt.plot(prices_test, prices_test, color='red', linewidth=2, label='Actual Price') # Plotting the diagonal line for actual values
plt.title('Actual vs Predicted Prices') plt.xlabel('Actual Price')
plt.ylabel('Predicted Price') plt.legend()
plt.grid(True) plt.show()