MACHINE LEARNING College Program

Tarun JoshiTarun Joshi
4 min read
import pandas as pd

data= {

'email': [
'Free money!!!!' ,
'Hi Bob, how about a game of golf tomorrow?',
'Limited time offer, buy now!',
'Are you available for a meeting tomorrow?',
'Congratulations, you have won a lottery!!' ],

'label': ['spam', 'not spam', 'spam', 'not spam', 'spam']

}

df=pd.DataFrame(data)

print(df)

Output :

email label

0 Free money!!!! spam

1 Hi Bob, how about a game of golf tomorrow? not spam

2 Limited time offer, buy now! spam

3 Are you available for a meeting tomorrow? not spam

4 Congratulations, you have won a lottery!! spam

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, classification_report

#Convert text to lowercase
df['email']=df['email'].str.lower()

#Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(df['email'], df['label'], test_size=0.2,

random_state=42)

#Vectorize the text data

vectorizer = CountVectorizer()

X_train_vec= vectorizer.fit_transform(X_train)

X_test_vec=vectorizer.transform(X_test)

#Train the model

model=MultinomialNB()

model.fit(X_train_vec, y_train)

#Model evaluation

y_pred=model.predict(X_test_vec)

accuracy = accuracy_score(y_test, y_pred)

report=classification_report(y_test, y_pred, zero_division=0)

print(f'Accuracy: {accuracy}')

print('Classification Report:')

print(report)

Output:

Accuracy: 0.0

Classification Report:

precision recall f1-score support

not spam 0.00 0.00 0.00 1.0

spam 0.00 0.00 0.00 0.0

accuracy 0.00 1.0

macro avg 0.00 0.00 0.00 1.0

weighted avg 0.00 0.00 0.00

1.0

Practical 2:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
rmse = np.sqrt(mse)
r2=r2_score(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")
#Visualize the results
plt.scatter(X_test, y_test, color='blue', label='Actual Prices')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted Prices')
plt.xlabel('Number of Bedrooms')
plt.ylabel('House Price')
plt.title('Bedrooms vs House Price Prediction')
plt.legend()
plt.show()

Output:

Mean Absolute Error (MAE): 16019.417475728143

Mean Squared Error (MSE): 272297695.35300165

Root Mean Squared Error (RMSE): 16501.445250431905

R-squared (R2): 0.9369498824074

Practical 3

# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, target_names=iris.target_names)

# Print the results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)

Output:

Mood nhi hai mera abhi

Logistics regression using Sklearn

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
y_pred_linear_class = np.where(y_pred_linear >= 0.5, 1, 0)

# Evaluate Linear Regression model
print("Linear Regression for Classification:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_linear_class)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_linear_class)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred_linear_class)}\n")

# 2. Logistic Regression for Classification
# Initialize the Logistic Regression model
logistic_model = LogisticRegression(max_iter=10000)  # Increase max_iter if convergence warnings occur
logistic_model.fit(X_train, y_train)

# Predict using Logistic Regression
y_pred_logistic = logistic_model.predict(X_test)

# Evaluate Logistic Regression model
print("Logistic Regression for Classification:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_logistic)}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred_logistic)}")
print(f"Classification Report:\n{classification_report(y_test, y_pred_logistic)}")

Output:

Linear Regression for Classification:

Accuracy: 0.956140350877193

Confusion Matrix:

[[39 4]

[ 1 70]]

Classification Report:

precision recall f1-score support

0 0.97 0.91 0.94 43

1 0.95 0.99 0.97 71

accuracy 0.96 114

macro avg 0.96 0.95 0.95 114

weighted avg 0.96 0.96 0.96 114

Logistic Regression for Classification:

Accuracy: 0.956140350877193

Confusion Matrix:

[[39 4]

[ 1 70]]

Classification Report:

precision recall f1-score support

0 0.97 0.91 0.94 43

1 0.95 0.99 0.97 71

accuracy 0.96 114

macro avg 0.96 0.95 0.95 114

weighted avg 0.96 0.96

3
Subscribe to my newsletter

Read articles from Tarun Joshi directly inside your inbox. Subscribe to the newsletter, and don't miss out.

Written by

Tarun Joshi
Tarun Joshi