import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from dmba import regressionSummary
from dmba import backward_elimination, stepwise_selection
from dmba import adjusted_r2_score, AIC_score
import statsmodels.formula.api as sm
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier, plot_tree
%matplotlib inline
from utils import *
import warnings
col_names = [
"SerialNumber",
"Gender",
"LowerSecondarySchollGrade%",
"LowerSecondarySchollBoard",
"HigherSecondarySchollGrade%",
"HigherSecondarySchollBoard",
"HigherSecondarySchollSpecialization",
"DegreeGrade%",
"DegreeSpecialization",
"WorkExperience",
"EmployabilityTestGrade%",
"MBASpecialization",
"MBAGrade%",
"PlacementStatus",
"Salary"
]
data = pd.read_csv("Placement_Data_Full_Class.csv", names=col_names, header=0)
data.head(10)
SerialNumber | Gender | LowerSecondarySchollGrade% | LowerSecondarySchollBoard | HigherSecondarySchollGrade% | HigherSecondarySchollBoard | HigherSecondarySchollSpecialization | DegreeGrade% | DegreeSpecialization | WorkExperience | EmployabilityTestGrade% | MBASpecialization | MBAGrade% | PlacementStatus | Salary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | M | 67.00 | Others | 91.00 | Others | Commerce | 58.00 | Sci&Tech | No | 55.00 | Mkt&HR | 58.80 | Placed | 270000.0 |
1 | 2 | M | 79.33 | Central | 78.33 | Others | Science | 77.48 | Sci&Tech | Yes | 86.50 | Mkt&Fin | 66.28 | Placed | 200000.0 |
2 | 3 | M | 65.00 | Central | 68.00 | Central | Arts | 64.00 | Comm&Mgmt | No | 75.00 | Mkt&Fin | 57.80 | Placed | 250000.0 |
3 | 4 | M | 56.00 | Central | 52.00 | Central | Science | 52.00 | Sci&Tech | No | 66.00 | Mkt&HR | 59.43 | Not Placed | NaN |
4 | 5 | M | 85.80 | Central | 73.60 | Central | Commerce | 73.30 | Comm&Mgmt | No | 96.80 | Mkt&Fin | 55.50 | Placed | 425000.0 |
5 | 6 | M | 55.00 | Others | 49.80 | Others | Science | 67.25 | Sci&Tech | Yes | 55.00 | Mkt&Fin | 51.58 | Not Placed | NaN |
6 | 7 | F | 46.00 | Others | 49.20 | Others | Commerce | 79.00 | Comm&Mgmt | No | 74.28 | Mkt&Fin | 53.29 | Not Placed | NaN |
7 | 8 | M | 82.00 | Central | 64.00 | Central | Science | 66.00 | Sci&Tech | Yes | 67.00 | Mkt&Fin | 62.14 | Placed | 252000.0 |
8 | 9 | M | 73.00 | Central | 79.00 | Central | Commerce | 72.00 | Comm&Mgmt | No | 91.34 | Mkt&Fin | 61.29 | Placed | 231000.0 |
9 | 10 | M | 58.00 | Central | 70.00 | Central | Commerce | 61.00 | Comm&Mgmt | No | 54.00 | Mkt&Fin | 52.21 | Not Placed | NaN |
First thing to notice here is that if the status is "Not Placed", it seems that the salary will be NaN. Just a remainder to not treat it as missing data. It will be necessary to check if there is someone "Placed" with a NaN salary. Some encoding will be necessary for the categorical data.
The first column seems to be just an ID. It will be droped.
Let's take a better look at the features information
data["Gender"].value_counts()
M 139 F 76 Name: Gender, dtype: int64
data["LowerSecondarySchollBoard"].value_counts()
Central 116 Others 99 Name: LowerSecondarySchollBoard, dtype: int64
data["HigherSecondarySchollBoard"].value_counts()
Others 131 Central 84 Name: HigherSecondarySchollBoard, dtype: int64
data["HigherSecondarySchollSpecialization"].value_counts()
Commerce 113 Science 91 Arts 11 Name: HigherSecondarySchollSpecialization, dtype: int64
data["DegreeSpecialization"].value_counts()
Comm&Mgmt 145 Sci&Tech 59 Others 11 Name: DegreeSpecialization, dtype: int64
data["WorkExperience"].value_counts()
No 141 Yes 74 Name: WorkExperience, dtype: int64
data["PlacementStatus"].value_counts()
Placed 148 Not Placed 67 Name: PlacementStatus, dtype: int64
data.describe()
SerialNumber | LowerSecondarySchollGrade% | HigherSecondarySchollGrade% | DegreeGrade% | EmployabilityTestGrade% | MBAGrade% | Salary | |
---|---|---|---|---|---|---|---|
count | 215.000000 | 215.000000 | 215.000000 | 215.000000 | 215.000000 | 215.000000 | 148.000000 |
mean | 108.000000 | 67.303395 | 66.333163 | 66.370186 | 72.100558 | 62.278186 | 288655.405405 |
std | 62.209324 | 10.827205 | 10.897509 | 7.358743 | 13.275956 | 5.833385 | 93457.452420 |
min | 1.000000 | 40.890000 | 37.000000 | 50.000000 | 50.000000 | 51.210000 | 200000.000000 |
25% | 54.500000 | 60.600000 | 60.900000 | 61.000000 | 60.000000 | 57.945000 | 240000.000000 |
50% | 108.000000 | 67.000000 | 65.000000 | 66.000000 | 71.000000 | 62.000000 | 265000.000000 |
75% | 161.500000 | 75.700000 | 73.000000 | 72.000000 | 83.500000 | 66.255000 | 300000.000000 |
max | 215.000000 | 89.400000 | 97.700000 | 91.000000 | 98.000000 | 77.890000 | 940000.000000 |
data.isnull().sum()
SerialNumber 0 Gender 0 LowerSecondarySchollGrade% 0 LowerSecondarySchollBoard 0 HigherSecondarySchollGrade% 0 HigherSecondarySchollBoard 0 HigherSecondarySchollSpecialization 0 DegreeGrade% 0 DegreeSpecialization 0 WorkExperience 0 EmployabilityTestGrade% 0 MBASpecialization 0 MBAGrade% 0 PlacementStatus 0 Salary 67 dtype: int64
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
import seaborn as sns
from matplotlib import pyplot as plt
dataset = pd.read_csv('Placement_Data_Full_Class.csv')
sns.set(style="whitegrid")
sns.distplot(dataset.salary)
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
<AxesSubplot:xlabel='salary', ylabel='Density'>
dataset['salary']=dataset['salary'].fillna(0)
data.drop("SerialNumber", axis=1, inplace=True)
plot = sns.countplot(x="PlacementStatus", data=data)
plt.title("Placement Distribution")
plt.xlabel("Placement Status")
plot.get_figure().savefig("images/PlacementStatus.png")
print('Placed', round(data["PlacementStatus"].value_counts()[0]/len(data) * 100,2), '% of the dataset')
print('Not placed', round(data["PlacementStatus"].value_counts()[1]/len(data) * 100,2), '% of the dataset')
Placed 68.84 % of the dataset Not placed 31.16 % of the dataset
data[data["PlacementStatus"] == "Not Placed"]["PlacementStatus"].value_counts()
Not Placed 67 Name: PlacementStatus, dtype: int64
$ (1 rupee = $ 0.013)
dataset.index = dataset['sl_no']
dataset.drop('sl_no', axis=1, inplace=True)
dataset['salary'] = dataset['salary'] * 0.013
dataset.head(10)
gender | ssc_p | ssc_b | hsc_p | hsc_b | hsc_s | degree_p | degree_t | workex | etest_p | specialisation | mba_p | status | salary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sl_no | ||||||||||||||
1 | M | 67.00 | Others | 91.00 | Others | Commerce | 58.00 | Sci&Tech | No | 55.00 | Mkt&HR | 58.80 | Placed | 3510.0 |
2 | M | 79.33 | Central | 78.33 | Others | Science | 77.48 | Sci&Tech | Yes | 86.50 | Mkt&Fin | 66.28 | Placed | 2600.0 |
3 | M | 65.00 | Central | 68.00 | Central | Arts | 64.00 | Comm&Mgmt | No | 75.00 | Mkt&Fin | 57.80 | Placed | 3250.0 |
4 | M | 56.00 | Central | 52.00 | Central | Science | 52.00 | Sci&Tech | No | 66.00 | Mkt&HR | 59.43 | Not Placed | 0.0 |
5 | M | 85.80 | Central | 73.60 | Central | Commerce | 73.30 | Comm&Mgmt | No | 96.80 | Mkt&Fin | 55.50 | Placed | 5525.0 |
6 | M | 55.00 | Others | 49.80 | Others | Science | 67.25 | Sci&Tech | Yes | 55.00 | Mkt&Fin | 51.58 | Not Placed | 0.0 |
7 | F | 46.00 | Others | 49.20 | Others | Commerce | 79.00 | Comm&Mgmt | No | 74.28 | Mkt&Fin | 53.29 | Not Placed | 0.0 |
8 | M | 82.00 | Central | 64.00 | Central | Science | 66.00 | Sci&Tech | Yes | 67.00 | Mkt&Fin | 62.14 | Placed | 3276.0 |
9 | M | 73.00 | Central | 79.00 | Central | Commerce | 72.00 | Comm&Mgmt | No | 91.34 | Mkt&Fin | 61.29 | Placed | 3003.0 |
10 | M | 58.00 | Central | 70.00 | Central | Commerce | 61.00 | Comm&Mgmt | No | 54.00 | Mkt&Fin | 52.21 | Not Placed | 0.0 |
sns.barplot(x = dataset['gender'],y = dataset['salary'])
<AxesSubplot:xlabel='gender', ylabel='salary'>
_ = sns.swarmplot(x='gender', y='salary', data=dataset)
_ = plt.xlabel('Gender')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Gender and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 12.2% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
print('Men', round(data["Gender"].value_counts()[0]/len(data) * 100,2), '% of the dataset')
print('Women', round(data["Gender"].value_counts()[1]/len(data) * 100,2), '% of the dataset')
Men 64.65 % of the dataset Women 35.35 % of the dataset
sns.barplot(x = dataset['gender'],y = dataset['ssc_p'])
<AxesSubplot:xlabel='gender', ylabel='ssc_p'>
sns.barplot(x = dataset['gender'],y = dataset['hsc_p'])
<AxesSubplot:xlabel='gender', ylabel='hsc_p'>
sns.barplot(x = dataset['gender'],y = dataset['degree_p'])
<AxesSubplot:xlabel='gender', ylabel='degree_p'>
sns.countplot(x="DegreeSpecialization", hue="Gender", data=data)
<AxesSubplot:xlabel='DegreeSpecialization', ylabel='count'>
sns.barplot(x="DegreeSpecialization", y="Salary", hue="Gender", data=data)
<AxesSubplot:xlabel='DegreeSpecialization', ylabel='Salary'>
sns.barplot(x = dataset['gender'],y = dataset['mba_p'])
<AxesSubplot:xlabel='gender', ylabel='mba_p'>
MBA Specialisation
# MBASpecialization
sns.countplot(x="MBASpecialization", hue="Gender", data=data)
<AxesSubplot:xlabel='MBASpecialization', ylabel='count'>
sns.barplot(x="MBASpecialization", y="Salary", hue="Gender", data=data)
<AxesSubplot:xlabel='MBASpecialization', ylabel='Salary'>
sns.boxplot(x = dataset['salary'],y= dataset['gender'],saturation=1)
mean_of_both=round(dataset['salary'].groupby(dataset['gender']).mean())
median_both=round(dataset['salary'].groupby(dataset['gender']).median())
print('The mean salary of Male is {} while Female is {}'.format(mean_of_both[1],mean_of_both[0]))
print('The median salary of Male is {} while Female is {}'.format(median_both[1],median_both[0]))
plt.show()
The mean salary of Male is 2796.0 while Female is 2195.0 The median salary of Male is 3250.0 while Female is 2808.0
sns.set()
_ = plt.hist(dataset['salary'], bins=25)
_ = plt.xlabel('Salary in $')
_ = plt.ylabel('Count')
_ = plt.title('Salary Histogram')
plt.show()
_ = sns.swarmplot(x='ssc_b', y='salary', data=dataset)
_ = plt.xlabel('Board of Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Secondary Education and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 11.2% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
_ = plt.hist(dataset['ssc_p'], bins=25)
_ = plt.xlabel('Secondary Education Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Secondary Education Percentage Histogram')
plt.show()
sns.boxplot(x = dataset['status'], y = dataset['ssc_p'])
<AxesSubplot:xlabel='status', ylabel='ssc_p'>
Swarmplot of Higher Education and Salary
_ = sns.swarmplot(x='hsc_b', y='salary', data=dataset)
_ = plt.xlabel('Board of Higher Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Higher Secondary Education and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 11.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
_ = sns.swarmplot(x='hsc_s', y='salary', data=dataset)
_ = plt.xlabel('Specialization in Higher Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Specialization in Hihger Secondary Education and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 19.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning) C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 16.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
_ = plt.hist(dataset['hsc_p'], bins=25)
_ = plt.xlabel('Higher Secondary Education Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Higher Secondary Education Percentage Histogram')
plt.show()
sns.boxplot(x = dataset['status'], y = dataset['hsc_p'])
<AxesSubplot:xlabel='status', ylabel='hsc_p'>
Swarmplot Of Undergraduate Education field and salary
_ = sns.swarmplot(x='degree_t', y='salary', data=dataset)
_ = plt.xlabel('Undergraduate Degree Field')
_ = plt.ylabel('salary in $')
_ = plt.title('Swarmplot of Undergraduate Degree Field and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 31.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
_ = plt.hist(dataset['degree_p'], bins=25)
_ = plt.xlabel('Undergraduate Degree Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Undergraduate Degree Percentage Histogram')
plt.show()
sns.boxplot(x = dataset['status'], y = dataset['degree_p'])
<AxesSubplot:xlabel='status', ylabel='degree_p'>
Swarmplot of Work Experience and Salary
_ = sns.swarmplot(x='workex', y='salary', data=dataset)
_ = plt.xlabel('Work Experience')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Work Experience and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 22.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
Swarmplot Of Post graduation and salary
_ = sns.swarmplot(x='specialisation', y='salary', data=dataset)
_ = plt.xlabel('MBA Specialization')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of MBA Specialization and Salary')
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 17.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot. warnings.warn(msg, UserWarning)
_ = plt.hist(dataset['mba_p'], bins=25)
_ = plt.xlabel('MBA Percentage')
_ = plt.ylabel('Count')
_ = plt.title('MBA Percentage Histogram')
plt.show()
sns.boxplot(x = dataset['status'], y = dataset['mba_p'])
<AxesSubplot:xlabel='status', ylabel='mba_p'>
_ = plt.hist(dataset['etest_p'], bins=25)
_ = plt.xlabel('Employment Test Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Employment Test Percentage Histogram')
plt.show()
sns.jointplot(x = dataset['ssc_p'], y = dataset['salary'], kind='hex')
<seaborn.axisgrid.JointGrid at 0x1aa9b18afa0>
sns.jointplot(x = dataset['hsc_p'], y = dataset['salary'], kind='hex')
<seaborn.axisgrid.JointGrid at 0x1aa9af1ac70>
sns.jointplot(x = dataset['degree_p'], y = dataset['salary'], kind='hex')
<seaborn.axisgrid.JointGrid at 0x1aa9b319b50>
sns.jointplot(x = dataset['mba_p'], y = dataset['salary'], kind='hex')
<seaborn.axisgrid.JointGrid at 0x1aa99b93c40>
sns.violinplot(x=dataset["degree_t"], y=dataset["salary"], data=dataset)
sns.stripplot(x=dataset["degree_t"], y=dataset["salary"], data=dataset,hue=dataset['status'])
plt.show()
plt.figure(figsize =(19,8))
sns.lineplot(dataset["degree_p"], dataset["salary"], hue=dataset["degree_t"], data=dataset)
plt.show()
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. warnings.warn(
plot = sns.catplot(x="PlacementStatus", y="EmployabilityTestGrade%", data=data)
plt.xlabel("Placement Status")
plt.ylabel("Employability Test Grade (in %)")
plot.savefig("images/EmployabilityTest.png")
sns.regplot(x="EmployabilityTestGrade%", y="Salary", data=data)
plt.xlabel("Employability Test grade")
Text(0.5, 0, 'Employability Test grade')
sns.regplot(x="MBAGrade%", y="Salary", data=data)
plt.xlabel("MBA grades")
Text(0.5, 0, 'MBA grades')
sns.barplot(x="WorkExperience", y="Salary", data=data)
<AxesSubplot:xlabel='WorkExperience', ylabel='Salary'>
salary_with_experience = np.mean(data[data["WorkExperience"] == "Yes"]["Salary"])
salary_no_experience = np.mean(data[data["WorkExperience"] == "No"]["Salary"])
print("Salary with work experience: ", salary_with_experience)
print("Salary with no work experience: ", salary_no_experience)
print("Percentual difference: ", 100*(salary_with_experience / salary_no_experience - 1))
Salary with work experience: 303265.625 Salary with no work experience: 277523.8095238095 Percentual difference: 9.275534059711731
sns.countplot(x="PlacementStatus", hue="WorkExperience", data=data)
plt.xlabel("Placement status")
Text(0.5, 0, 'Placement status')
placement_with_experience = data[data["WorkExperience"] == "Yes"]["PlacementStatus"] == "Placed"
placement_no_experience = data[data["WorkExperience"] == "No"]["PlacementStatus"] == "Placed"
print("Placement with work experience: ", placement_with_experience.mean())
print("Placement with no work experience: ", placement_no_experience.mean())
print("Percentual difference: ", 100*(placement_with_experience.mean() / placement_no_experience.mean() - 1))
# print("Salary with work experience: ", salary_with_experience)
# print("Salary with no work experience: ", salary_no_experience)
# print("Percentual difference: ", 100*(salary_with_experience / salary_no_experience - 1))
Placement with work experience: 0.8648648648648649 Placement with no work experience: 0.5957446808510638 Percentual difference: 45.173745173745196
We will Label Encode (no new columns) categorical features that only have two unique values. e.g. Gender M/F and Hot Encoded every other categorical column (columns with more than 2 unique values)
def label_encoding(data):
le = LabelEncoder()
le.fit(data)
return le.transform(data)
def one_hot_encoding(data):
return pd.get_dummies(data)
data["Gender"] = label_encoding(data["Gender"])
data["LowerSecondarySchollBoard"] = label_encoding(data["LowerSecondarySchollBoard"])
data["HigherSecondarySchollBoard"] = label_encoding(data["HigherSecondarySchollBoard"])
data["WorkExperience"] = label_encoding(data["WorkExperience"])
data["MBASpecialization"] = label_encoding(data["MBASpecialization"])
data["PlacementStatus"] = label_encoding(data["PlacementStatus"])
data = one_hot_encoding(data)
data.head(5)
Gender | LowerSecondarySchollGrade% | LowerSecondarySchollBoard | HigherSecondarySchollGrade% | HigherSecondarySchollBoard | DegreeGrade% | WorkExperience | EmployabilityTestGrade% | MBASpecialization | MBAGrade% | PlacementStatus | Salary | HigherSecondarySchollSpecialization_Arts | HigherSecondarySchollSpecialization_Commerce | HigherSecondarySchollSpecialization_Science | DegreeSpecialization_Comm&Mgmt | DegreeSpecialization_Others | DegreeSpecialization_Sci&Tech | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 67.00 | 1 | 91.00 | 1 | 58.00 | 0 | 55.0 | 1 | 58.80 | 1 | 270000.0 | 0 | 1 | 0 | 0 | 0 | 1 |
1 | 1 | 79.33 | 0 | 78.33 | 1 | 77.48 | 1 | 86.5 | 0 | 66.28 | 1 | 200000.0 | 0 | 0 | 1 | 0 | 0 | 1 |
2 | 1 | 65.00 | 0 | 68.00 | 0 | 64.00 | 0 | 75.0 | 0 | 57.80 | 1 | 250000.0 | 1 | 0 | 0 | 1 | 0 | 0 |
3 | 1 | 56.00 | 0 | 52.00 | 0 | 52.00 | 0 | 66.0 | 1 | 59.43 | 0 | NaN | 0 | 0 | 1 | 0 | 0 | 1 |
4 | 1 | 85.80 | 0 | 73.60 | 0 | 73.30 | 0 | 96.8 | 0 | 55.50 | 1 | 425000.0 | 0 | 1 | 0 | 1 | 0 | 0 |
salary_corr = data[data["PlacementStatus"] == 1].corr()["Salary"].sort_values()
# Display correlations
salary_corr # corr() returns NaN if all values are that same, which is the case with PlacementStatus
DegreeSpecialization_Comm&Mgmt -0.160322 MBASpecialization -0.146576 HigherSecondarySchollSpecialization_Arts -0.088253 DegreeGrade% -0.019272 DegreeSpecialization_Others -0.016573 HigherSecondarySchollSpecialization_Commerce -0.014219 HigherSecondarySchollBoard -0.007549 LowerSecondarySchollBoard 0.005539 LowerSecondarySchollGrade% 0.035330 HigherSecondarySchollSpecialization_Science 0.049547 HigherSecondarySchollGrade% 0.076819 WorkExperience 0.136920 Gender 0.158912 DegreeSpecialization_Sci&Tech 0.172492 MBAGrade% 0.175013 EmployabilityTestGrade% 0.178307 Salary 1.000000 PlacementStatus NaN Name: Salary, dtype: float64
plt.figure(figsize=(20,10))
sns.heatmap(data.corr().abs(), annot=True)
<AxesSubplot:>
placement_corr = data.corr()["PlacementStatus"].sort_values()
# Display correlations
placement_corr # placement_corr = data.corr()["PlacementStatus"].sort_values()
# Display correlations
placement_corr
MBASpecialization -0.250655 DegreeSpecialization_Others -0.117232 HigherSecondarySchollSpecialization_Arts -0.071653 HigherSecondarySchollSpecialization_Science 0.007279 DegreeSpecialization_Sci&Tech 0.008688 HigherSecondarySchollBoard 0.016945 HigherSecondarySchollSpecialization_Commerce 0.024414 LowerSecondarySchollBoard 0.037297 DegreeSpecialization_Comm&Mgmt 0.046849 MBAGrade% 0.076922 Gender 0.090670 EmployabilityTestGrade% 0.127639 WorkExperience 0.276060 DegreeGrade% 0.479861 HigherSecondarySchollGrade% 0.491228 LowerSecondarySchollGrade% 0.607889 PlacementStatus 1.000000 Salary NaN Name: PlacementStatus, dtype: float64
We already answered this question in the data analysis phase. Things like MBA area of specialization, experience and grades are the most important here.
We will try to predict if someone who just graduated from the MBA is employed.
First, the preprocessing: remove the salary and any other feature that does not seems to be related to the problem. Finally, standardize the data.
placement_corr = data.corr()["PlacementStatus"].sort_values()
# Display correlations
# Here we are looking at PlacementStatus, so Salary is not important and we will ignore the NaN
columns_to_drop = [
"Salary",
"HigherSecondarySchollSpecialization_Arts",
"HigherSecondarySchollSpecialization_Science",
"HigherSecondarySchollSpecialization_Commerce",
"DegreeSpecialization_Sci&Tech",
"DegreeSpecialization_Comm&Mgmt",
"DegreeSpecialization_Others",
"MBAGrade%"
]
placement_data = data.drop(columns_to_drop, axis=1)
y = placement_data["PlacementStatus"]
X = placement_data.drop(["PlacementStatus"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train = (X_train - X_train.mean()) / X_train.std()
X_test = (X_test - X_test.mean()) / X_test.std()
X_train.head(5)
Gender | LowerSecondarySchollGrade% | LowerSecondarySchollBoard | HigherSecondarySchollGrade% | HigherSecondarySchollBoard | DegreeGrade% | WorkExperience | EmployabilityTestGrade% | MBASpecialization | |
---|---|---|---|---|---|---|---|---|---|
79 | -1.367791 | 0.162888 | -0.913101 | -0.379024 | -1.237537 | -0.043779 | -0.710761 | 0.192690 | 1.100944 |
25 | -1.367791 | -1.372102 | 1.089495 | -1.042572 | -1.237537 | -2.148106 | 1.399653 | 0.268148 | -0.903605 |
104 | 0.727318 | 0.162888 | -0.913101 | -0.289355 | 0.803870 | -0.176964 | 1.399653 | -1.316460 | 1.100944 |
145 | 0.727318 | 2.069940 | 1.089495 | -0.050837 | 0.803870 | 0.655444 | -0.710761 | -0.033682 | 1.100944 |
189 | -1.367791 | -1.239356 | -0.913101 | 1.019806 | 0.803870 | 0.382414 | -0.710761 | 1.750888 | -0.903605 |
X_test.head(5)
Gender | LowerSecondarySchollGrade% | LowerSecondarySchollBoard | HigherSecondarySchollGrade% | HigherSecondarySchollBoard | DegreeGrade% | WorkExperience | EmployabilityTestGrade% | MBASpecialization | |
---|---|---|---|---|---|---|---|---|---|
200 | 0.812920 | 0.106333 | 0.977008 | -0.855848 | 0.738549 | -0.288051 | -0.812920 | 1.379209 | -0.738549 |
212 | 0.812920 | -0.057945 | 0.977008 | -0.031218 | 0.738549 | 1.040835 | 1.174218 | -0.751182 | -0.738549 |
138 | -1.174218 | 1.174140 | 0.977008 | -0.384631 | 0.738549 | 1.040835 | 1.174218 | 2.009745 | -0.738549 |
176 | -1.174218 | -0.715058 | -0.977008 | -0.855848 | 0.738549 | -1.783049 | -0.812920 | -1.049661 | 1.292461 |
15 | -1.174218 | -0.222223 | -0.977008 | 0.911216 | -1.292461 | 0.376392 | 1.174218 | 0.218873 | -0.738549 |
salary_corr = data[data["PlacementStatus"] == 1].corr()["Salary"].sort_values()
# Display correlations
salary_corr
DegreeSpecialization_Comm&Mgmt -0.160322 MBASpecialization -0.146576 HigherSecondarySchollSpecialization_Arts -0.088253 DegreeGrade% -0.019272 DegreeSpecialization_Others -0.016573 HigherSecondarySchollSpecialization_Commerce -0.014219 HigherSecondarySchollBoard -0.007549 LowerSecondarySchollBoard 0.005539 LowerSecondarySchollGrade% 0.035330 HigherSecondarySchollSpecialization_Science 0.049547 HigherSecondarySchollGrade% 0.076819 WorkExperience 0.136920 Gender 0.158912 DegreeSpecialization_Sci&Tech 0.172492 MBAGrade% 0.175013 EmployabilityTestGrade% 0.178307 Salary 1.000000 PlacementStatus NaN Name: Salary, dtype: float64
columns_to_keep = [
"Salary",
"EmployabilityTestGrade%",
"MBAGrade%",
"DegreeSpecialization_Sci&Tech",
"DegreeSpecialization_Comm&Mgmt",
"Gender",
"WorkExperience",
"MBASpecialization"
]
salary_data = data[columns_to_keep]
salary_data = salary_data[salary_data['Salary'].notna()]
y = salary_data["Salary"]
X = salary_data.drop(["Salary"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train = (X_train - X_train.mean()) / X_train.std()
X_test = (X_test - X_test.mean()) / X_test.std()
X_train.head(10)
EmployabilityTestGrade% | MBAGrade% | DegreeSpecialization_Sci&Tech | DegreeSpecialization_Comm&Mgmt | Gender | WorkExperience | MBASpecialization | |
---|---|---|---|---|---|---|---|
95 | 1.605444 | -0.038434 | -0.606913 | 0.665057 | 0.700475 | 1.098872 | -0.736480 |
27 | -0.380194 | -0.790559 | -0.606913 | 0.665057 | 0.700475 | -0.903182 | 1.347601 |
101 | 0.342908 | -0.327842 | -0.606913 | 0.665057 | 0.700475 | -0.903182 | 1.347601 |
177 | 1.138320 | 1.417020 | -0.606913 | 0.665057 | -1.416869 | 1.098872 | -0.736480 |
47 | 0.342908 | -1.318897 | -0.606913 | 0.665057 | 0.700475 | 1.098872 | -0.736480 |
44 | 1.138320 | 1.230250 | -0.606913 | 0.665057 | -1.416869 | 1.098872 | -0.736480 |
146 | 0.849079 | -1.219623 | -0.606913 | 0.665057 | 0.700475 | -0.903182 | 1.347601 |
56 | -0.380194 | 0.755756 | -0.606913 | 0.665057 | 0.700475 | -0.903182 | -0.736480 |
164 | -1.461954 | 0.526921 | -0.606913 | 0.665057 | -1.416869 | -0.903182 | -0.736480 |
80 | -0.452504 | -0.006465 | -0.606913 | 0.665057 | -1.416869 | 1.098872 | 1.347601 |
y_train.head(10)
95 420000.0 27 265000.0 101 380000.0 177 650000.0 47 204000.0 44 200000.0 146 233000.0 56 240000.0 164 250000.0 80 240000.0 Name: Salary, dtype: float64
def trainRegression(X, Y):
for degree in range(1, 4):
name = 'Linear Regression degree' + str(degree)
polynomial_features = PolynomialFeatures(degree=degree)
X_poly = polynomial_features.fit_transform(X)
model = LinearRegression()
cv_results = cross_val_score(model, X_poly, Y, cv=10, scoring='r2')
print('%s: %.6f (%.6f)' % (name, cv_results.mean(), cv_results.std()))
trainRegression(X_train, y_train)
Linear Regression degree1: -0.300547 (0.593319) Linear Regression degree2: -56012702404937373576069120.000000 (167397531279467017331015680.000000) Linear Regression degree3: -1273200841177254913376256.000000 (2553128309561786567229440.000000)
model = LinearRegression()
model.fit(X_train, y_train)
print(model.predict(X_test))
print(y_test.values)
print(r2_score(y_test, model.predict(X_test)))
[303555.88575642 302070.63156475 253695.20532424 306526.57108331 291716.43907118 241686.34186756 239362.40691229 280510.03222008 322901.97097687 279396.03079504 319113.87953033 296539.86383578 322601.86041505 275781.6899594 272699.08542453] [350000. 336000. 260000. 350000. 400000. 265000. 250000. 250000. 240000. 200000. 411000. 250000. 400000. 300000. 260000.] 0.23452225966615792
Results are not great, but the amount of data does not help very much
columns_to_keep = {
"Gender",
"LowerSecondarySchollGrade%",
"HigherSecondarySchollGrade%",
"DegreeGrade%",
"WorkExperience",
"MBASpecialization", # This one is about the MBA but the institution would already know it beforehand
"HigherSecondarySchollSpecialization_Arts",
"HigherSecondarySchollSpecialization_Commerce",
"HigherSecondarySchollSpecialization_Science",
"DegreeSpecialization_Comm&Mgmt",
"DegreeSpecialization_Others",
"DegreeSpecialization_Sci&Tech",
"PlacementStatus"
}
before_mba_data = data[columns_to_keep]
The knowlodge from the data analysis phase can tell us what are the best features.
"Things like MBA area of specialization, experience and grades are the most important here."
placement_corr = before_mba_data.corr()["PlacementStatus"].sort_values()
# Display correlations
placement_corr # Here we are looking at PlacementStatus, so Salary is not important and we will ignore the NaN
MBASpecialization -0.250655 DegreeSpecialization_Others -0.117232 HigherSecondarySchollSpecialization_Arts -0.071653 HigherSecondarySchollSpecialization_Science 0.007279 DegreeSpecialization_Sci&Tech 0.008688 HigherSecondarySchollSpecialization_Commerce 0.024414 DegreeSpecialization_Comm&Mgmt 0.046849 Gender 0.090670 WorkExperience 0.276060 DegreeGrade% 0.479861 HigherSecondarySchollGrade% 0.491228 LowerSecondarySchollGrade% 0.607889 PlacementStatus 1.000000 Name: PlacementStatus, dtype: float64
columns_to_keep = {
"Gender",
"LowerSecondarySchollGrade%",
"HigherSecondarySchollGrade%",
"DegreeGrade%",
"WorkExperience",
"MBASpecialization", # This one is about the MBA but the institution would already know it beforehand
# "HigherSecondarySchollSpecialization_Arts",
# "HigherSecondarySchollSpecialization_Commerce",
# "HigherSecondarySchollSpecialization_Science",
# "DegreeSpecialization_Comm&Mgmt",
# "DegreeSpecialization_Others",
# "DegreeSpecialization_Sci&Tech",
"PlacementStatus"
}
before_mba_data = data[columns_to_keep]
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('KNN', KNeighborsClassifier()))
models.append(('TREE', DecisionTreeClassifier()))
models.append(('SVM', SVC(gamma='auto', probability=True)))
models.append(('GB', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))
def runModels(X_train, Y_train, scoring='accuracy'):
for name, model in models:
kfold = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
print('%s: %.6f (%.6f)' % (name, cv_results.mean(), cv_results.std()))
runModels(X_train, y_train, scoring='f1')
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5. warnings.warn(
LR: nan (nan) KNN: nan (nan) TREE: nan (nan)
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: Traceback (most recent call last): File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score scores = scorer(estimator, X_test, y_test) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__ score = scorer._score(cached_call, estimator, *args, **kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score return self._sign * self._score_func(y_true, y_pred, **self._kwargs) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score return fbeta_score( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score _, _, f, _ = precision_recall_fscore_support( File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label) File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels raise ValueError( ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']. warnings.warn( C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5. warnings.warn(
SVM: nan (nan)
!jupyter nbconvert --to html Campus-placement-data-anal.ipynb
File "C:\Users\PREETI\AppData\Local\Temp/ipykernel_15924/2096042202.py", line 1 jupyter nbconvert --execute --to html notebook.ipynb ^ SyntaxError: invalid syntax