import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report, mean_squared_error, r2_score
from imblearn.over_sampling import SMOTE
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from dmba import regressionSummary
from dmba import backward_elimination, stepwise_selection
from dmba import adjusted_r2_score, AIC_score
import statsmodels.formula.api as sm
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier, plot_tree
%matplotlib inline
from utils import *

import warnings


col_names = [
    "SerialNumber", 
    "Gender", 
    "LowerSecondarySchollGrade%", 
    "LowerSecondarySchollBoard", 
    "HigherSecondarySchollGrade%", 
    "HigherSecondarySchollBoard",
    "HigherSecondarySchollSpecialization",
    "DegreeGrade%",
    "DegreeSpecialization",
    "WorkExperience",
    "EmployabilityTestGrade%",
    "MBASpecialization",
    "MBAGrade%",
    "PlacementStatus",
    "Salary"
]
data = pd.read_csv("Placement_Data_Full_Class.csv", names=col_names, header=0)
data.head(10)


data["Gender"].value_counts()

M    139
F     76
Name: Gender, dtype: int64


data["LowerSecondarySchollBoard"].value_counts()

Central    116
Others      99
Name: LowerSecondarySchollBoard, dtype: int64


data["HigherSecondarySchollBoard"].value_counts()

Others     131
Central     84
Name: HigherSecondarySchollBoard, dtype: int64


data["HigherSecondarySchollSpecialization"].value_counts()

Commerce    113
Science      91
Arts         11
Name: HigherSecondarySchollSpecialization, dtype: int64


data["DegreeSpecialization"].value_counts()

Comm&Mgmt    145
Sci&Tech      59
Others        11
Name: DegreeSpecialization, dtype: int64


data["WorkExperience"].value_counts()

No     141
Yes     74
Name: WorkExperience, dtype: int64


data["PlacementStatus"].value_counts()

Placed        148
Not Placed     67
Name: PlacementStatus, dtype: int64


data.describe()


data.isnull().sum()

SerialNumber                            0
Gender                                  0
LowerSecondarySchollGrade%              0
LowerSecondarySchollBoard               0
HigherSecondarySchollGrade%             0
HigherSecondarySchollBoard              0
HigherSecondarySchollSpecialization     0
DegreeGrade%                            0
DegreeSpecialization                    0
WorkExperience                          0
EmployabilityTestGrade%                 0
MBASpecialization                       0
MBAGrade%                               0
PlacementStatus                         0
Salary                                 67
dtype: int64


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import seaborn as sns
from matplotlib import pyplot as plt

dataset = pd.read_csv('Placement_Data_Full_Class.csv')

sns.set(style="whitegrid")
sns.distplot(dataset.salary)

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)

<AxesSubplot:xlabel='salary', ylabel='Density'>


dataset['salary']=dataset['salary'].fillna(0)
data.drop("SerialNumber", axis=1, inplace=True)


plot = sns.countplot(x="PlacementStatus", data=data)
plt.title("Placement Distribution")
plt.xlabel("Placement Status")
plot.get_figure().savefig("images/PlacementStatus.png")

print('Placed', round(data["PlacementStatus"].value_counts()[0]/len(data) * 100,2), '% of the dataset')
print('Not placed', round(data["PlacementStatus"].value_counts()[1]/len(data) * 100,2), '% of the dataset')

Placed 68.84 % of the dataset
Not placed 31.16 % of the dataset


data[data["PlacementStatus"] == "Not Placed"]["PlacementStatus"].value_counts()

Not Placed    67
Name: PlacementStatus, dtype: int64


dataset.index = dataset['sl_no']
dataset.drop('sl_no', axis=1, inplace=True)

dataset['salary'] = dataset['salary'] * 0.013
dataset.head(10)


sns.barplot(x = dataset['gender'],y = dataset['salary'])

<AxesSubplot:xlabel='gender', ylabel='salary'>


_ = sns.swarmplot(x='gender', y='salary', data=dataset)
_ = plt.xlabel('Gender')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Gender and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 12.2% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


print('Men', round(data["Gender"].value_counts()[0]/len(data) * 100,2), '% of the dataset')
print('Women', round(data["Gender"].value_counts()[1]/len(data) * 100,2), '% of the dataset')

Men 64.65 % of the dataset
Women 35.35 % of the dataset


sns.barplot(x = dataset['gender'],y = dataset['ssc_p'])

<AxesSubplot:xlabel='gender', ylabel='ssc_p'>


sns.barplot(x = dataset['gender'],y = dataset['hsc_p'])

<AxesSubplot:xlabel='gender', ylabel='hsc_p'>


sns.barplot(x = dataset['gender'],y = dataset['degree_p'])

<AxesSubplot:xlabel='gender', ylabel='degree_p'>


sns.countplot(x="DegreeSpecialization", hue="Gender", data=data)

<AxesSubplot:xlabel='DegreeSpecialization', ylabel='count'>


sns.barplot(x="DegreeSpecialization", y="Salary", hue="Gender", data=data)

<AxesSubplot:xlabel='DegreeSpecialization', ylabel='Salary'>


sns.barplot(x = dataset['gender'],y = dataset['mba_p'])

<AxesSubplot:xlabel='gender', ylabel='mba_p'>


# MBASpecialization
sns.countplot(x="MBASpecialization", hue="Gender", data=data)

<AxesSubplot:xlabel='MBASpecialization', ylabel='count'>


sns.barplot(x="MBASpecialization", y="Salary", hue="Gender", data=data)

<AxesSubplot:xlabel='MBASpecialization', ylabel='Salary'>


sns.boxplot(x = dataset['salary'],y= dataset['gender'],saturation=1)
mean_of_both=round(dataset['salary'].groupby(dataset['gender']).mean())
median_both=round(dataset['salary'].groupby(dataset['gender']).median())
print('The mean salary of Male is {} while Female is {}'.format(mean_of_both[1],mean_of_both[0]))
print('The median salary of Male is {} while Female is {}'.format(median_both[1],median_both[0]))
plt.show()

The mean salary of Male is 2796.0 while Female is 2195.0
The median salary of Male is 3250.0 while Female is 2808.0


sns.set()
_ = plt.hist(dataset['salary'], bins=25)
_ = plt.xlabel('Salary in $')
_ = plt.ylabel('Count')
_ = plt.title('Salary Histogram')
plt.show()


_ = sns.swarmplot(x='ssc_b', y='salary', data=dataset)
_ = plt.xlabel('Board of Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Secondary Education and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 11.2% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = plt.hist(dataset['ssc_p'], bins=25)
_ = plt.xlabel('Secondary Education Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Secondary Education Percentage Histogram')
plt.show()


sns.boxplot(x = dataset['status'], y = dataset['ssc_p'])

<AxesSubplot:xlabel='status', ylabel='ssc_p'>


_ = sns.swarmplot(x='hsc_b', y='salary', data=dataset)
_ = plt.xlabel('Board of Higher Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Higher Secondary Education and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 11.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = sns.swarmplot(x='hsc_s', y='salary', data=dataset)
_ = plt.xlabel('Specialization in Higher Secondary Education')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Board of Specialization in Hihger Secondary Education and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 19.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 16.5% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = plt.hist(dataset['hsc_p'], bins=25)
_ = plt.xlabel('Higher Secondary Education Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Higher Secondary Education Percentage Histogram')
plt.show()


sns.boxplot(x = dataset['status'], y = dataset['hsc_p'])

<AxesSubplot:xlabel='status', ylabel='hsc_p'>


_ = sns.swarmplot(x='degree_t', y='salary', data=dataset)
_ = plt.xlabel('Undergraduate Degree Field')
_ = plt.ylabel('salary in $')
_ = plt.title('Swarmplot of Undergraduate Degree Field and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 31.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = plt.hist(dataset['degree_p'], bins=25)
_ = plt.xlabel('Undergraduate Degree Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Undergraduate Degree Percentage Histogram')
plt.show()


sns.boxplot(x = dataset['status'], y = dataset['degree_p'])

<AxesSubplot:xlabel='status', ylabel='degree_p'>


_ = sns.swarmplot(x='workex', y='salary', data=dataset)
_ = plt.xlabel('Work Experience')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of Work Experience and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 22.7% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = sns.swarmplot(x='specialisation', y='salary', data=dataset)
_ = plt.xlabel('MBA Specialization')
_ = plt.ylabel('Salary in $')
_ = plt.title('Swarmplot of MBA Specialization and Salary')
plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\categorical.py:1296: UserWarning: 17.9% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
  warnings.warn(msg, UserWarning)


_ = plt.hist(dataset['mba_p'], bins=25)
_ = plt.xlabel('MBA Percentage')
_ = plt.ylabel('Count')
_ = plt.title('MBA Percentage Histogram')
plt.show()


sns.boxplot(x = dataset['status'], y = dataset['mba_p'])

<AxesSubplot:xlabel='status', ylabel='mba_p'>


_ = plt.hist(dataset['etest_p'], bins=25)
_ = plt.xlabel('Employment Test Percentage')
_ = plt.ylabel('Count')
_ = plt.title('Employment Test Percentage Histogram')
plt.show()


sns.jointplot(x = dataset['ssc_p'], y = dataset['salary'], kind='hex')

<seaborn.axisgrid.JointGrid at 0x1aa9b18afa0>


sns.jointplot(x = dataset['hsc_p'], y = dataset['salary'], kind='hex')

<seaborn.axisgrid.JointGrid at 0x1aa9af1ac70>


sns.jointplot(x = dataset['degree_p'], y = dataset['salary'], kind='hex')

<seaborn.axisgrid.JointGrid at 0x1aa9b319b50>


sns.jointplot(x = dataset['mba_p'], y = dataset['salary'], kind='hex')

<seaborn.axisgrid.JointGrid at 0x1aa99b93c40>


sns.violinplot(x=dataset["degree_t"], y=dataset["salary"], data=dataset)
sns.stripplot(x=dataset["degree_t"], y=dataset["salary"], data=dataset,hue=dataset['status'])
plt.show()


plt.figure(figsize =(19,8))
sns.lineplot(dataset["degree_p"], dataset["salary"], hue=dataset["degree_t"], data=dataset)

plt.show()

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  warnings.warn(


plot = sns.catplot(x="PlacementStatus", y="EmployabilityTestGrade%", data=data)
plt.xlabel("Placement Status")
plt.ylabel("Employability Test Grade (in %)")
plot.savefig("images/EmployabilityTest.png")


sns.regplot(x="EmployabilityTestGrade%", y="Salary", data=data)
plt.xlabel("Employability Test grade")

Text(0.5, 0, 'Employability Test grade')


sns.regplot(x="MBAGrade%", y="Salary", data=data)
plt.xlabel("MBA grades")

Text(0.5, 0, 'MBA grades')


sns.barplot(x="WorkExperience", y="Salary", data=data)

<AxesSubplot:xlabel='WorkExperience', ylabel='Salary'>


salary_with_experience = np.mean(data[data["WorkExperience"] == "Yes"]["Salary"])
salary_no_experience = np.mean(data[data["WorkExperience"] == "No"]["Salary"])
print("Salary with work experience: ", salary_with_experience)
print("Salary with no work experience: ", salary_no_experience)
print("Percentual difference: ", 100*(salary_with_experience / salary_no_experience - 1))

Salary with work experience:  303265.625
Salary with no work experience:  277523.8095238095
Percentual difference:  9.275534059711731


sns.countplot(x="PlacementStatus", hue="WorkExperience", data=data)
plt.xlabel("Placement status")

Text(0.5, 0, 'Placement status')


placement_with_experience = data[data["WorkExperience"] == "Yes"]["PlacementStatus"] == "Placed"
placement_no_experience = data[data["WorkExperience"] == "No"]["PlacementStatus"] == "Placed"

print("Placement with work experience: ", placement_with_experience.mean())
print("Placement with no work experience: ", placement_no_experience.mean())
print("Percentual difference: ", 100*(placement_with_experience.mean() / placement_no_experience.mean() - 1))

# print("Salary with work experience: ", salary_with_experience)
# print("Salary with no work experience: ", salary_no_experience)
# print("Percentual difference: ", 100*(salary_with_experience / salary_no_experience - 1))

Placement with work experience:  0.8648648648648649
Placement with no work experience:  0.5957446808510638
Percentual difference:  45.173745173745196


def label_encoding(data):
    le = LabelEncoder()
    le.fit(data)
    return le.transform(data)


def one_hot_encoding(data):
    return pd.get_dummies(data)


data["Gender"] = label_encoding(data["Gender"])
data["LowerSecondarySchollBoard"] = label_encoding(data["LowerSecondarySchollBoard"])
data["HigherSecondarySchollBoard"] = label_encoding(data["HigherSecondarySchollBoard"])
data["WorkExperience"] = label_encoding(data["WorkExperience"])
data["MBASpecialization"] = label_encoding(data["MBASpecialization"])
data["PlacementStatus"] = label_encoding(data["PlacementStatus"])


data = one_hot_encoding(data)


data.head(5)


salary_corr = data[data["PlacementStatus"] == 1].corr()["Salary"].sort_values()

# Display correlations
salary_corr # corr() returns NaN if all values are that same, which is the case with PlacementStatus

DegreeSpecialization_Comm&Mgmt                 -0.160322
MBASpecialization                              -0.146576
HigherSecondarySchollSpecialization_Arts       -0.088253
DegreeGrade%                                   -0.019272
DegreeSpecialization_Others                    -0.016573
HigherSecondarySchollSpecialization_Commerce   -0.014219
HigherSecondarySchollBoard                     -0.007549
LowerSecondarySchollBoard                       0.005539
LowerSecondarySchollGrade%                      0.035330
HigherSecondarySchollSpecialization_Science     0.049547
HigherSecondarySchollGrade%                     0.076819
WorkExperience                                  0.136920
Gender                                          0.158912
DegreeSpecialization_Sci&Tech                   0.172492
MBAGrade%                                       0.175013
EmployabilityTestGrade%                         0.178307
Salary                                          1.000000
PlacementStatus                                      NaN
Name: Salary, dtype: float64


plt.figure(figsize=(20,10))
sns.heatmap(data.corr().abs(), annot=True)

<AxesSubplot:>


placement_corr = data.corr()["PlacementStatus"].sort_values()

# Display correlations
placement_corr # placement_corr = data.corr()["PlacementStatus"].sort_values()

# Display correlations
placement_corr

MBASpecialization                              -0.250655
DegreeSpecialization_Others                    -0.117232
HigherSecondarySchollSpecialization_Arts       -0.071653
HigherSecondarySchollSpecialization_Science     0.007279
DegreeSpecialization_Sci&Tech                   0.008688
HigherSecondarySchollBoard                      0.016945
HigherSecondarySchollSpecialization_Commerce    0.024414
LowerSecondarySchollBoard                       0.037297
DegreeSpecialization_Comm&Mgmt                  0.046849
MBAGrade%                                       0.076922
Gender                                          0.090670
EmployabilityTestGrade%                         0.127639
WorkExperience                                  0.276060
DegreeGrade%                                    0.479861
HigherSecondarySchollGrade%                     0.491228
LowerSecondarySchollGrade%                      0.607889
PlacementStatus                                 1.000000
Salary                                               NaN
Name: PlacementStatus, dtype: float64


placement_corr = data.corr()["PlacementStatus"].sort_values()

# Display correlations
 # Here we are looking at PlacementStatus, so Salary is not important and we will ignore the NaN


columns_to_drop = [
    "Salary", 
    "HigherSecondarySchollSpecialization_Arts",
    "HigherSecondarySchollSpecialization_Science",
    "HigherSecondarySchollSpecialization_Commerce",
    "DegreeSpecialization_Sci&Tech",
    "DegreeSpecialization_Comm&Mgmt",
    "DegreeSpecialization_Others",
    "MBAGrade%"
]
placement_data = data.drop(columns_to_drop, axis=1)


y = placement_data["PlacementStatus"]
X = placement_data.drop(["PlacementStatus"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


X_train = (X_train - X_train.mean()) / X_train.std()
X_test = (X_test - X_test.mean()) / X_test.std()


X_train.head(5)


X_test.head(5)


salary_corr = data[data["PlacementStatus"] == 1].corr()["Salary"].sort_values()

# Display correlations
salary_corr

DegreeSpecialization_Comm&Mgmt                 -0.160322
MBASpecialization                              -0.146576
HigherSecondarySchollSpecialization_Arts       -0.088253
DegreeGrade%                                   -0.019272
DegreeSpecialization_Others                    -0.016573
HigherSecondarySchollSpecialization_Commerce   -0.014219
HigherSecondarySchollBoard                     -0.007549
LowerSecondarySchollBoard                       0.005539
LowerSecondarySchollGrade%                      0.035330
HigherSecondarySchollSpecialization_Science     0.049547
HigherSecondarySchollGrade%                     0.076819
WorkExperience                                  0.136920
Gender                                          0.158912
DegreeSpecialization_Sci&Tech                   0.172492
MBAGrade%                                       0.175013
EmployabilityTestGrade%                         0.178307
Salary                                          1.000000
PlacementStatus                                      NaN
Name: Salary, dtype: float64


columns_to_keep = [
    "Salary",
    "EmployabilityTestGrade%",
    "MBAGrade%",
    "DegreeSpecialization_Sci&Tech",
    "DegreeSpecialization_Comm&Mgmt",
    "Gender",
    "WorkExperience",
    "MBASpecialization"
]
salary_data = data[columns_to_keep]


salary_data = salary_data[salary_data['Salary'].notna()]


y = salary_data["Salary"]
X = salary_data.drop(["Salary"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


X_train = (X_train - X_train.mean()) / X_train.std()
X_test = (X_test - X_test.mean()) / X_test.std()


X_train.head(10)


y_train.head(10)

95     420000.0
27     265000.0
101    380000.0
177    650000.0
47     204000.0
44     200000.0
146    233000.0
56     240000.0
164    250000.0
80     240000.0
Name: Salary, dtype: float64


def trainRegression(X, Y):
    for degree in range(1, 4):
        name = 'Linear Regression degree' + str(degree)
        
        polynomial_features = PolynomialFeatures(degree=degree)
        X_poly = polynomial_features.fit_transform(X)
        
        model = LinearRegression()
        
        cv_results = cross_val_score(model, X_poly, Y, cv=10, scoring='r2')
        print('%s: %.6f (%.6f)' % (name, cv_results.mean(), cv_results.std()))


trainRegression(X_train, y_train)

Linear Regression degree1: -0.300547 (0.593319)
Linear Regression degree2: -56012702404937373576069120.000000 (167397531279467017331015680.000000)
Linear Regression degree3: -1273200841177254913376256.000000 (2553128309561786567229440.000000)


model = LinearRegression()
model.fit(X_train, y_train)
print(model.predict(X_test))
print(y_test.values)
print(r2_score(y_test, model.predict(X_test)))

[303555.88575642 302070.63156475 253695.20532424 306526.57108331
 291716.43907118 241686.34186756 239362.40691229 280510.03222008
 322901.97097687 279396.03079504 319113.87953033 296539.86383578
 322601.86041505 275781.6899594  272699.08542453]
[350000. 336000. 260000. 350000. 400000. 265000. 250000. 250000. 240000.
 200000. 411000. 250000. 400000. 300000. 260000.]
0.23452225966615792


columns_to_keep = {
    "Gender",
    "LowerSecondarySchollGrade%",
    "HigherSecondarySchollGrade%",
    "DegreeGrade%",
    "WorkExperience",
    "MBASpecialization", # This one is about the MBA but the institution would already know it beforehand
    "HigherSecondarySchollSpecialization_Arts",
    "HigherSecondarySchollSpecialization_Commerce",
    "HigherSecondarySchollSpecialization_Science",
    "DegreeSpecialization_Comm&Mgmt",
    "DegreeSpecialization_Others",
    "DegreeSpecialization_Sci&Tech",
    "PlacementStatus"
}

before_mba_data = data[columns_to_keep]


placement_corr = before_mba_data.corr()["PlacementStatus"].sort_values()

# Display correlations
placement_corr # Here we are looking at PlacementStatus, so Salary is not important and we will ignore the NaN

MBASpecialization                              -0.250655
DegreeSpecialization_Others                    -0.117232
HigherSecondarySchollSpecialization_Arts       -0.071653
HigherSecondarySchollSpecialization_Science     0.007279
DegreeSpecialization_Sci&Tech                   0.008688
HigherSecondarySchollSpecialization_Commerce    0.024414
DegreeSpecialization_Comm&Mgmt                  0.046849
Gender                                          0.090670
WorkExperience                                  0.276060
DegreeGrade%                                    0.479861
HigherSecondarySchollGrade%                     0.491228
LowerSecondarySchollGrade%                      0.607889
PlacementStatus                                 1.000000
Name: PlacementStatus, dtype: float64


columns_to_keep = {
    "Gender",
    "LowerSecondarySchollGrade%",
    "HigherSecondarySchollGrade%",
    "DegreeGrade%",
    "WorkExperience",
    "MBASpecialization", # This one is about the MBA but the institution would already know it beforehand
#     "HigherSecondarySchollSpecialization_Arts",
#     "HigherSecondarySchollSpecialization_Commerce",
#     "HigherSecondarySchollSpecialization_Science",
#     "DegreeSpecialization_Comm&Mgmt",
#     "DegreeSpecialization_Others",
#     "DegreeSpecialization_Sci&Tech",
    "PlacementStatus"
}

before_mba_data = data[columns_to_keep]


models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('KNN', KNeighborsClassifier()))
models.append(('TREE', DecisionTreeClassifier()))
models.append(('SVM', SVC(gamma='auto', probability=True)))
models.append(('GB', GradientBoostingClassifier()))
models.append(('RF', RandomForestClassifier()))


def runModels(X_train, Y_train, scoring='accuracy'):
    for name, model in models:
        kfold = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
        cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
        print('%s: %.6f (%.6f)' % (name, cv_results.mean(), cv_results.std()))


runModels(X_train, y_train, scoring='f1')

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\base.py:441: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

LR: nan (nan)
KNN: nan (nan)
TREE: nan (nan)

C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py:771: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_validation.py", line 762, in _score
    scores = scorer(estimator, X_test, y_test)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1113, in f1_score
    return fbeta_score(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1251, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1534, in precision_recall_fscore_support
    labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
  File "C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\metrics\_classification.py", line 1355, in _check_set_wise_labels
    raise ValueError(
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

  warnings.warn(
C:\Users\PREETI\AppData\Local\Programs\Python\Python39\lib\site-packages\sklearn\model_selection\_split.py:676: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.
  warnings.warn(

SVM: nan (nan)


!jupyter nbconvert --to html Campus-placement-data-anal.ipynb

  File "C:\Users\PREETI\AppData\Local\Temp/ipykernel_15924/2096042202.py", line 1
    jupyter nbconvert --execute --to html notebook.ipynb
            ^
SyntaxError: invalid syntax

	SerialNumber	LowerSecondarySchollGrade%	HigherSecondarySchollGrade%	DegreeGrade%	EmployabilityTestGrade%	MBAGrade%	Salary
count	215.000000	215.000000	215.000000	215.000000	215.000000	215.000000	148.000000
mean	108.000000	67.303395	66.333163	66.370186	72.100558	62.278186	288655.405405
std	62.209324	10.827205	10.897509	7.358743	13.275956	5.833385	93457.452420
min	1.000000	40.890000	37.000000	50.000000	50.000000	51.210000	200000.000000
25%	54.500000	60.600000	60.900000	61.000000	60.000000	57.945000	240000.000000
50%	108.000000	67.000000	65.000000	66.000000	71.000000	62.000000	265000.000000
75%	161.500000	75.700000	73.000000	72.000000	83.500000	66.255000	300000.000000
max	215.000000	89.400000	97.700000	91.000000	98.000000	77.890000	940000.000000

	Gender	LowerSecondarySchollGrade%	LowerSecondarySchollBoard	HigherSecondarySchollGrade%	HigherSecondarySchollBoard	DegreeGrade%	WorkExperience	EmployabilityTestGrade%	MBASpecialization	MBAGrade%	PlacementStatus	Salary	HigherSecondarySchollSpecialization_Arts	HigherSecondarySchollSpecialization_Commerce	HigherSecondarySchollSpecialization_Science	DegreeSpecialization_Comm&Mgmt	DegreeSpecialization_Sci&Tech
0	1	67.00	1	91.00	1	58.00	0	55.0	1	58.80	1	270000.0	0	1	0	0	1
1	1	79.33	0	78.33	1	77.48	1	86.5	0	66.28	1	200000.0	0	0	1	0	1
2	1	65.00	0	68.00	0	64.00	0	75.0	0	57.80	1	250000.0	1	0	0	1	0
3	1	56.00	0	52.00	0	52.00	0	66.0	1	59.43	0	NaN	0	0	1	0	1
4	1	85.80	0	73.60	0	73.30	0	96.8	0	55.50	1	425000.0	0	1	0	1	0

	Gender	LowerSecondarySchollGrade%	LowerSecondarySchollBoard	HigherSecondarySchollGrade%	HigherSecondarySchollBoard	DegreeGrade%	WorkExperience	EmployabilityTestGrade%	MBASpecialization
79	-1.367791	0.162888	-0.913101	-0.379024	-1.237537	-0.043779	-0.710761	0.192690	1.100944
25	-1.367791	-1.372102	1.089495	-1.042572	-1.237537	-2.148106	1.399653	0.268148	-0.903605
104	0.727318	0.162888	-0.913101	-0.289355	0.803870	-0.176964	1.399653	-1.316460	1.100944
145	0.727318	2.069940	1.089495	-0.050837	0.803870	0.655444	-0.710761	-0.033682	1.100944
189	-1.367791	-1.239356	-0.913101	1.019806	0.803870	0.382414	-0.710761	1.750888	-0.903605

	Gender	LowerSecondarySchollGrade%	LowerSecondarySchollBoard	HigherSecondarySchollGrade%	HigherSecondarySchollBoard	DegreeGrade%	WorkExperience	EmployabilityTestGrade%	MBASpecialization
200	0.812920	0.106333	0.977008	-0.855848	0.738549	-0.288051	-0.812920	1.379209	-0.738549
212	0.812920	-0.057945	0.977008	-0.031218	0.738549	1.040835	1.174218	-0.751182	-0.738549
138	-1.174218	1.174140	0.977008	-0.384631	0.738549	1.040835	1.174218	2.009745	-0.738549
176	-1.174218	-0.715058	-0.977008	-0.855848	0.738549	-1.783049	-0.812920	-1.049661	1.292461
15	-1.174218	-0.222223	-0.977008	0.911216	-1.292461	0.376392	1.174218	0.218873	-0.738549

	EmployabilityTestGrade%	MBAGrade%	DegreeSpecialization_Sci&Tech	DegreeSpecialization_Comm&Mgmt	Gender	WorkExperience	MBASpecialization
95	1.605444	-0.038434	-0.606913	0.665057	0.700475	1.098872	-0.736480
27	-0.380194	-0.790559	-0.606913	0.665057	0.700475	-0.903182	1.347601
101	0.342908	-0.327842	-0.606913	0.665057	0.700475	-0.903182	1.347601
177	1.138320	1.417020	-0.606913	0.665057	-1.416869	1.098872	-0.736480
47	0.342908	-1.318897	-0.606913	0.665057	0.700475	1.098872	-0.736480
44	1.138320	1.230250	-0.606913	0.665057	-1.416869	1.098872	-0.736480
146	0.849079	-1.219623	-0.606913	0.665057	0.700475	-0.903182	1.347601
56	-0.380194	0.755756	-0.606913	0.665057	0.700475	-0.903182	-0.736480
164	-1.461954	0.526921	-0.606913	0.665057	-1.416869	-0.903182	-0.736480
80	-0.452504	-0.006465	-0.606913	0.665057	-1.416869	1.098872	1.347601

	SerialNumber	Gender	LowerSecondarySchollGrade%	LowerSecondarySchollBoard	HigherSecondarySchollGrade%	HigherSecondarySchollBoard	HigherSecondarySchollSpecialization	DegreeGrade%	DegreeSpecialization	WorkExperience	EmployabilityTestGrade%	MBASpecialization	MBAGrade%	PlacementStatus	Salary
0	1	M	67.00	Others	91.00	Others	Commerce	58.00	Sci&Tech	No	55.00	Mkt&HR	58.80	Placed	270000.0
1	2	M	79.33	Central	78.33	Others	Science	77.48	Sci&Tech	Yes	86.50	Mkt&Fin	66.28	Placed	200000.0
2	3	M	65.00	Central	68.00	Central	Arts	64.00	Comm&Mgmt	No	75.00	Mkt&Fin	57.80	Placed	250000.0
3	4	M	56.00	Central	52.00	Central	Science	52.00	Sci&Tech	No	66.00	Mkt&HR	59.43	Not Placed	NaN
4	5	M	85.80	Central	73.60	Central	Commerce	73.30	Comm&Mgmt	No	96.80	Mkt&Fin	55.50	Placed	425000.0
5	6	M	55.00	Others	49.80	Others	Science	67.25	Sci&Tech	Yes	55.00	Mkt&Fin	51.58	Not Placed	NaN
6	7	F	46.00	Others	49.20	Others	Commerce	79.00	Comm&Mgmt	No	74.28	Mkt&Fin	53.29	Not Placed	NaN
7	8	M	82.00	Central	64.00	Central	Science	66.00	Sci&Tech	Yes	67.00	Mkt&Fin	62.14	Placed	252000.0
8	9	M	73.00	Central	79.00	Central	Commerce	72.00	Comm&Mgmt	No	91.34	Mkt&Fin	61.29	Placed	231000.0
9	10	M	58.00	Central	70.00	Central	Commerce	61.00	Comm&Mgmt	No	54.00	Mkt&Fin	52.21	Not Placed	NaN

	gender	ssc_p	ssc_b	hsc_p	hsc_b	hsc_s	degree_p	degree_t	workex	etest_p	specialisation	mba_p	status	salary
sl_no
1	M	67.00	Others	91.00	Others	Commerce	58.00	Sci&Tech	No	55.00	Mkt&HR	58.80	Placed	3510.0
2	M	79.33	Central	78.33	Others	Science	77.48	Sci&Tech	Yes	86.50	Mkt&Fin	66.28	Placed	2600.0
3	M	65.00	Central	68.00	Central	Arts	64.00	Comm&Mgmt	No	75.00	Mkt&Fin	57.80	Placed	3250.0
4	M	56.00	Central	52.00	Central	Science	52.00	Sci&Tech	No	66.00	Mkt&HR	59.43	Not Placed	0.0
5	M	85.80	Central	73.60	Central	Commerce	73.30	Comm&Mgmt	No	96.80	Mkt&Fin	55.50	Placed	5525.0
6	M	55.00	Others	49.80	Others	Science	67.25	Sci&Tech	Yes	55.00	Mkt&Fin	51.58	Not Placed	0.0
7	F	46.00	Others	49.20	Others	Commerce	79.00	Comm&Mgmt	No	74.28	Mkt&Fin	53.29	Not Placed	0.0
8	M	82.00	Central	64.00	Central	Science	66.00	Sci&Tech	Yes	67.00	Mkt&Fin	62.14	Placed	3276.0
9	M	73.00	Central	79.00	Central	Commerce	72.00	Comm&Mgmt	No	91.34	Mkt&Fin	61.29	Placed	3003.0
10	M	58.00	Central	70.00	Central	Commerce	61.00	Comm&Mgmt	No	54.00	Mkt&Fin	52.21	Not Placed	0.0

Campus Placement Dataset Analysis¶

Dataset¶

Categorical features¶

Gender¶

Lower Secondary Education¶

Higher Secondary Education¶

Specialization¶

Degree Specialization¶

Work Experience¶

Placement Status¶

Exploratory Analysis¶

Let's analyse the distribution of the PlacementStatus column¶

Converting rupees to US¶

Gender based analysis¶

Swarmplot of Gender VS Salary¶

Lower Secondary Grade Based¶

Higher Secondary Grade Based¶

Undergraduation Degree Based¶

Specialization¶

Post Graduation Based¶

Specialisation¶

Mean¶

Observation¶

Placement status based analysis¶

Lower Secondary Education Based¶

Higher Secondary Education Based¶

Undergraduate Degree Based¶

Post Graduation Based¶

Employment Test Percentage¶

Observation¶

Salary vs Academic results¶

Does the grade in school affect your future life?¶

Lower Secondary Eduacation vs Salary¶

Higher Secondary Education vs Salary¶

Undergraduate vs Salary¶

Post Graduation vs Salary¶

Which stream students are getting more placed and which stream students are mostly not placed?¶

Observation¶

Does Percentage in College determine salary ?¶

Relation between EmployabilityTest and PlacementStatus¶

Salary Vs Employability Test¶

Is the MBAGrade important?¶

How about work experience?¶

Observation¶

Encoding¶

Correlations¶

Heat Map for checking correlation¶

Observations¶

Questions¶

What are the major factors that lead to the person being hired¶

What are the major factors that affect the salary?¶

Linear Regression¶

Observation¶

Can the institution predict if the person will be successfull before they are accepted?¶