MACHINE LEARNING College Program
import pandas as pd
data= {
'email': [
'Free money!!!!' ,
'Hi Bob, how about a game of golf tomorrow?',
'Limited time offer, buy now!',
'Are you available for a meeting tomorrow?',
'Congratulations, you have won a lottery!!' ],
'label': ['spam', 'not spam', 'spam', 'not spam', 'spam']
}
df=pd.DataFrame(data)
print(df)
Output :
email label
0 Free money!!!! spam
1 Hi Bob, how about a game of golf tomorrow? not spam
2 Limited time offer, buy now! spam
3 Are you available for a meeting tomorrow? not spam
4 Congratulations, you have won a lottery!! spam
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
#Convert text to lowercase
df['email']=df['email'].str.lower()
#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['email'], df['label'], test_size=0.2,
random_state=42)
#Vectorize the text data
vectorizer = CountVectorizer()
X_train_vec= vectorizer.fit_transform(X_train)
X_test_vec=vectorizer.transform(X_test)
#Train the model
model=MultinomialNB()
model.fit(X_train_vec, y_train)
#Model evaluation
y_pred=model.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
report=classification_report(y_test, y_pred, zero_division=0)
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)
Output:
Accuracy: 0.0
Classification Report:
precision recall f1-score support
not spam 0.00 0.00 0.00 1.0
spam 0.00 0.00 0.00 0.0
accuracy 0.00 1.0
macro avg 0.00 0.00 0.00 1.0
weighted avg 0.00 0.00 0.00
1.0
Practical 2:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
rmse = np.sqrt(mse)
r2=r2_score(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")
#Visualize the results
plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted Prices')
plt.xlabel('Number of Bedrooms')
plt.ylabel('House Price')
plt.title('Bedrooms vs House Price Prediction')
plt.legend()
plt.show()
Output:
Mean Absolute Error (MAE): 16019.417475728143
Mean Squared Error (MSE): 272297695.35300165
Root Mean Squared Error (RMSE): 16501.445250431905
R-squared (R2): 0.9369498824074
Practical 3
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train a Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=iris.target_names)
# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)
Output:
Mood nhi hai mera abhi
Logistics regression using Sklearn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
y_pred_linear_class = np.where(y_pred_linear >= 0.5, 1, 0)
# Evaluate Linear Regression model
print("Linear Regression for Classification:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_linear_class)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_linear_class)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred_linear_class)}\n")
# 2. Logistic Regression for Classification
# Initialize the Logistic Regression model
logistic_model = LogisticRegression(max_iter=10000) # Increase max_iter if convergence warnings occur
logistic_model.fit(X_train, y_train)
# Predict using Logistic Regression
y_pred_logistic = logistic_model.predict(X_test)
# Evaluate Logistic Regression model
print("Logistic Regression for Classification:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_logistic)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_logistic)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred_logistic)}")
Output:
Linear Regression for Classification:
Accuracy: 0.956140350877193
Confusion Matrix:
[[39 4]
[ 1 70]]
Classification Report:
precision recall f1-score support
0 0.97 0.91 0.94 43
1 0.95 0.99 0.97 71
accuracy 0.96 114
macro avg 0.96 0.95 0.95 114
weighted avg 0.96 0.96 0.96 114
Logistic Regression for Classification:
Accuracy: 0.956140350877193
Confusion Matrix:
[[39 4]
[ 1 70]]
Classification Report:
precision recall f1-score support
0 0.97 0.91 0.94 43
1 0.95 0.99 0.97 71
accuracy 0.96 114
macro avg 0.96 0.95 0.95 114
weighted avg 0.96 0.96
Subscribe to my newsletter
Read articles from Tarun Joshi directly inside your inbox. Subscribe to the newsletter, and don't miss out.
Written by