import pandas as pd
import numpy as np
import re


df = pd.read_csv("allocations.csv")
df.head()


df.shape

(1139, 6)


df["Slot"].describe()

count     1139
unique      23
top          .
freq       314
Name: Slot, dtype: object


def convert(x):
    x = x.replace(" (IDC)","00")
    if x == ".":
        x = "16.3"
    return x


convert("3 (IDC).1")

'300.1'


df["Slot"] = df["Slot"].apply(convert)


df[["Day","Slot No."]] = df["Slot"].str.split(".",expand = True)
df.head(10)


df.tail()


df.drop(columns = "Slot", inplace=True)


df["Day"] = pd.to_numeric(df["Day"])


df["Slot No."] = pd.to_numeric(df["Slot No."])


df.head()


df.drop(columns = "Job code", inplace=True)


df.head()


df_BTech = df[df.Program == "B.Tech."]
df_BTech.head()


df_MTech = df[df.Program == "M.Tech."]
df_MTech.head()


df_Dual = df[df.Program.str.contains("Dual Degree")]
df_Dual.head()


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1139 entries, 0 to 1138
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Roll        1139 non-null   object
 1   Department  1139 non-null   object
 2   Program     1139 non-null   object
 3   Company     1139 non-null   object
 4   Day         1139 non-null   int64 
 5   Slot No.    1139 non-null   int64 
dtypes: int64(2), object(4)
memory usage: 53.5+ KB


slot_data = df.groupby("Slot No.",as_index=0)["Roll"].count()
slot_data = slot_data.reset_index(drop = 1)
slot_data.rename({"Roll":"No. of Students"},axis=1,inplace=True)
slot_data
# EXPORT AS SHEET


Department_Stats_Breakdown = df.groupby(["Department","Program"], as_index = False)["Roll"].count().sort_values(["Roll","Program","Department"], ascending = [False,True,True])
Department_Stats_Breakdown.reset_index(drop = 1, inplace = True)
Department_Stats_Breakdown.head(15)


Program_Stats = df.groupby("Program",as_index = 0)["Roll"].count().sort_values("Roll",ascending = False)
Program_Stats.reset_index(drop = True, inplace= True)
Program_Stats.rename({"Roll":"No. of Students"},axis=1,inplace=True)
Program_Stats
# EXPORT AS SHEET


Program_Stats_Percentage = pd.DataFrame(round(df.value_counts("Program", normalize = True)*100,2))


Program_Stats_Percentage.reset_index(inplace = True)
Program_Stats_Percentage.rename({0:"% of Students"},axis=1,inplace=True)
Program_Stats_Percentage
# EXPORT AS SHEET


company_stats = df.groupby(["Company","Day","Slot No."],as_index=False)["Roll"].count().sort_values(["Roll","Company"], ascending = [0,1])
company_stats.reset_index(drop = True, inplace=True)
company_stats.shape

(285, 4)


company_stats[company_stats["Company"].str.contains("Hp",flags=re.I, regex=True)]


company_stats.rename({"Roll":"No. of Students"},axis=1,inplace=True)
company_stats.head(10)
# EXPORT AS SHEET


company_stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 285 entries, 0 to 284
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Company          285 non-null    object
 1   Day              285 non-null    int64 
 2   Slot No.         285 non-null    int64 
 3   No. of Students  285 non-null    int64 
dtypes: int64(3), object(1)
memory usage: 9.0+ KB


company_stats["Company"].nunique()

258


company_stats["Company"].count()

285


repeated_companies = company_stats[company_stats.duplicated(subset="Company", keep=False)]
repeated_companies = repeated_companies.sort_values("Company")
repeated_companies.reset_index(drop=True,inplace=True)
repeated_companies.head()
# EXPORT AS SHEET


repeated_companies_total = repeated_companies.groupby(["Company"],as_index=False)["No. of Students"].sum()
repeated_companies_total = repeated_companies_total.sort_values(["No. of Students","Company"],ascending=[0,1])
repeated_companies_total.reset_index(drop=True,inplace=True)
repeated_companies_total
# EXPORT AS SHEET


top_companies = company_stats.groupby("Company",as_index=False)["No. of Students"].sum()
top_companies = top_companies.sort_values(["No. of Students","Company"],ascending=[0,1])
top_companies.reset_index(drop=True,inplace=True)
top_companies.head(20)
# EXPORT AS SHEET


def dept_splitter(x):
    splitted = x.split(", ")
    if len(splitted)==1:
        return x
    elif splitted[0]==splitted[1]:
        return splitted[0]
    else:
        return x


top_branches = Department_Stats_Breakdown.copy()
top_branches["Department"] = top_branches["Department"].apply(dept_splitter)
top_branches = top_branches.rename({"Roll":"No. of Students"},axis=1)
top_branches.head(20)
# EXPORT AS SHEET


top_branches_all = top_branches.groupby("Department",as_index=False)["No. of Students"].sum()
top_branches_all = top_branches_all.sort_values(["No. of Students","Department"],ascending=[0,1])
top_branches_all.reset_index(drop = True, inplace = True)
top_branches_all.head(10)
# EXPORT AS SHEET


top_branches_BTech = top_branches.loc[top_branches.Program == "B.Tech.",["Department","No. of Students"]]
top_branches_BTech.reset_index(drop = True, inplace=True)
top_branches_BTech
# EXPORT AS SHEET


top_branches_MTech = top_branches.loc[top_branches.Program == "M.Tech.",["Department","No. of Students"]]
top_branches_MTech.reset_index(drop = True, inplace=True)
top_branches_MTech
# EXPORT AS SHEET


top_branches_other = top_branches.loc[~top_branches.Program.isin(["M.Tech.","B.Tech."])]
top_branches_other.reset_index(drop=True,inplace=True)
top_branches_other.head()
# EXPORT AS SHEET


# %%script echo "Skipping execution of this cell."
# Please uncomment above line ( only remove '# ' ) to stop executing below codes and creating the excel file again and again each time you run all cells. 

with pd.ExcelWriter("Allocation_Analysis.xlsx") as w:
    df_BTech.to_excel(w,sheet_name="B.Tech Details",index=False,freeze_panes=(1,0))
    df_MTech.to_excel(w,sheet_name="M.Tech Details",index=False,freeze_panes=(1,0))
    df_Dual.to_excel(w,sheet_name="Dual Degree Details",index=False,freeze_panes=(1,0))
    slot_data.to_excel(w,sheet_name="Slot Analysis",index=False,freeze_panes=(1,0))
    Program_Stats.to_excel(w,sheet_name="Program_Stats",index=False,freeze_panes=(1,0))
    Program_Stats_Percentage.to_excel(w,sheet_name="Program_Stats_Percentage",index=False,freeze_panes=(1,0))
    company_stats.to_excel(w,sheet_name="Company_Stats",index=False,freeze_panes=(1,0))
    repeated_companies.to_excel(w,sheet_name="Repeating Companies",index=False,freeze_panes=(1,0))
    repeated_companies_total.to_excel(w,sheet_name="Best repeating Companies",index=False,freeze_panes=(1,0))
    top_companies.to_excel(w,sheet_name="Top Recruiters",index=False,freeze_panes=(1,0))
    top_branches.to_excel(w,sheet_name="Best Program & Department",index=False,freeze_panes=(1,0))
    top_branches_all.to_excel(w,sheet_name="Overall Best Department",index=False,freeze_panes=(1,0))
    top_branches_BTech.to_excel(w,sheet_name="Best B.Tech Department",index=False,freeze_panes=(1,0))
    top_branches_MTech.to_excel(w,sheet_name="Best M.Tech Department",index=False,freeze_panes=(1,0))
    top_branches_other.to_excel(w,sheet_name="Best_Dept_other_Programs",index=False,freeze_panes=(1,0))

	Slot	Roll	Department	Program	Company	Job code
0	10.1	183059003	Computer Science & Engineering	M.Tech.	Colortokens	1
1	10.1	193100039	Mechanical Engineering	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	1
2	10.1	195280009	Applied Statistics and Informatics	M.Sc.	NeuralTechSoft	1
3	10.1	193070021	Electrical Engineering	M.Tech.	Ignitarium Technology Solutions Pvt Ltd	1
4	10.1	193060032	Earth Sciences	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	1

	Slot	Roll	Department	Program	Company	Job code	Day	Slot No.
0	10.1	183059003	Computer Science & Engineering	M.Tech.	Colortokens	1	10	1
1	10.1	193100039	Mechanical Engineering	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	1	10	1
2	10.1	195280009	Applied Statistics and Informatics	M.Sc.	NeuralTechSoft	1	10	1
3	10.1	193070021	Electrical Engineering	M.Tech.	Ignitarium Technology Solutions Pvt Ltd	1	10	1
4	10.1	193060032	Earth Sciences	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	1	10	1
5	10.1	183310001	Geoinformatics and Natural Resources Engineering	M.Tech.	Regent Climate Connect Knowledge Solutions Pri...	1	10	1
6	10.1	193050057	Computer Science & Engineering	M.Tech.	Colortokens	1	10	1
7	10.1	170070003	Electrical Engineering	B.Tech.	BeeHyv	1	10	1
8	10.1	193170012	Energy Science and Engineering	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	1	10	1
9	10.1	183109005	Mechanical Engineering	M.Tech.	Applied Materials	2	10	1

	Roll	Department	Program	Company	Day	Slot No.
0	183059003	Computer Science & Engineering	M.Tech.	Colortokens	10	1
1	193100039	Mechanical Engineering	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	10	1
2	195280009	Applied Statistics and Informatics	M.Sc.	NeuralTechSoft	10	1
3	193070021	Electrical Engineering	M.Tech.	Ignitarium Technology Solutions Pvt Ltd	10	1
4	193060032	Earth Sciences	M.Tech.	Impact Guru Technology Ventures Pvt. Ltd.	10	1

	Roll	Department	Program	Company	Day	Slot No.
14	16D070026	Electrical Engineering	Dual Degree (B.Tech. + M.Tech.)	Ignitarium Technology Solutions Pvt Ltd	10	1
36	16D070027	Electrical Engineering	Dual Degree (B.Tech. + M.Tech.)	Agrifi	10	1
38	160110023	Metallurgical Engineering and Materials Science	Dual Degree (B.Tech. + M.Tech.)	BeeHyv	10	1
55	160110071	Electrical Engineering	Dual Degree (B.Tech. + M.Tech.)	Boston Consulting Group	1	1
56	16D170019	Energy Science and Engineering, Energy Science...	Dual Degree (B.Tech. + M.Tech.)	Procter and Gamble	1	1

	Department	Program	Roll
0	Computer Science & Engineering	B.Tech.	108
1	Electrical Engineering	M.Tech.	101
2	Computer Science & Engineering	M.Tech.	88
3	Mechanical Engineering	B.Tech.	78
4	Chemical Engineering	B.Tech.	73
5	Electrical Engineering	B.Tech.	57
6	Civil Engineering	B.Tech.	56
7	Electrical Engineering	Dual Degree (B.Tech. + M.Tech.)	48
8	Industrial Design Centre	M.Des.	48
9	Metallurgical Engineering and Materials Science	B.Tech.	47
10	Civil Engineering	M.Tech.	36
11	Mechanical Engineering	M.Tech.	36
12	Aerospace Engineering	B.Tech.	28
13	Metallurgical Engineering and Materials Science	M.Tech.	24
14	Mechanical Engineering	Dual Degree (B.Tech. + M.Tech.)	23

IIT - Bombay Placement Analysis¶

Dataset¶

Dataset Shape¶

Description¶

Data Cleaning and Transformation of data¶

¶

Final inspection of data after the tranformation¶

¶

Data Analysis¶

Slot wise analysis¶

Department and Program wise analysis¶

Program wise analysis¶

1. Number count.¶

2. Percentage count.¶

Company, Day and Slot wise mixed analysis¶

Finding all the companies who recruited more than once¶

Top recruiter Companies for the entire season¶

Top branches¶

Irrespective of Programs¶

Program wise¶

Exporting DataFrames into multiple Excel sheets of a single file¶

	Slot	Roll	Department	Program	Company	Job code	Day	Slot No.
1134	16.3	170050005	Computer Science & Engineering	B.Tech.	Placement Office	4	16	3
1135	16.3	170100028	Mechanical Engineering	B.Tech.	Placement Office	4	16	3
1136	16.3	193100059	Mechanical Engineering	M.Tech.	BYJU'S - The Learning App	1	16	3
1137	16.3	193100075	Mechanical Engineering	M.Tech.	Solar Energy Corporation Of India Limited	2	16	3
1138	16.3	16D070061	Electrical Engineering	Dual Degree (B.Tech. + M.Tech.)	Accenture	1	16	3

	Program	% of Students
0	B.Tech.	40.91
1	M.Tech.	34.94
2	Dual Degree (B.Tech. + M.Tech.)	11.59
3	M.Des.	4.21
4	M.Sc.	3.25
5	B.S.	1.76
6	B.Des.	1.05
7	Ph.D.	0.88
8	Dual Degree (B.Des. + M. Des.)	0.79
9	Dual Degree (M.Tech. + Ph.D.)	0.35
10	M.P.P	0.09
11	M.Phil.	0.09
12	M.S. by research (Exit Degree)	0.09

	Company	Day	Slot No.	No. of Students
0	Placement Office	16	3	147
1	Oracle	2	1	23
2	TATA Projects	3	1	16
3	Texas Instruments	1	1	15
4	Axis Bank	3	1	13
5	Google India Pvt. Ltd.	1	1	13
6	ICICI Lombard GIC Ltd.	5	1	12
7	Micron Technologies Operation India LLP	3	1	12
8	Bridgei2i Analytics Solutions	16	3	11
9	ICICI Bank	3	1	11

	Company	Day	Slot No.	No. of Students
0	ANZ Bank	3	1	4
1	ANZ Bank	1	1	2
2	ANZ Bank	3	2	5
3	Applied Materials	10	1	2
4	Applied Materials	3	1	2

	Company	No. of Students
0	Tata Consultancy Services	26
1	Axis Bank	20
2	Intel	12
3	Microsoft India Pvt. Ltd.	12
4	ANZ Bank	11
5	Qualcomm	11
6	Enphase Energy	10
7	HCL Technologies Ltd.	10
8	IDFC First Bank	10
9	Jaguar Land Rover India Limited	10
10	Bajaj Auto Ltd	8
11	LnT Infotech	8
12	Samsung Research Institute, Bangalore	8
13	pharmaACE- Analytics Centre of Excellence	8
14	Flipkart	6
15	Infosys	5
16	Navi Technologies Pvt Ltd	5
17	Applied Materials	4
18	HiLabs Inc.	4
19	KLA Tencor	4
20	Procter and Gamble	4
21	Sprinklr	4
22	Quantsapp Private limited	3