import pandas as pd
import numpy as np
import re
df = pd.read_csv("allocations.csv")
df.head()
Slot | Roll | Department | Program | Company | Job code | |
---|---|---|---|---|---|---|
0 | 10.1 | 183059003 | Computer Science & Engineering | M.Tech. | Colortokens | 1 |
1 | 10.1 | 193100039 | Mechanical Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 |
2 | 10.1 | 195280009 | Applied Statistics and Informatics | M.Sc. | NeuralTechSoft | 1 |
3 | 10.1 | 193070021 | Electrical Engineering | M.Tech. | Ignitarium Technology Solutions Pvt Ltd | 1 |
4 | 10.1 | 193060032 | Earth Sciences | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 |
df.shape
(1139, 6)
df["Slot"].describe()
count 1139 unique 23 top . freq 314 Name: Slot, dtype: object
def convert(x):
x = x.replace(" (IDC)","00")
if x == ".":
x = "16.3"
return x
convert("3 (IDC).1")
'300.1'
df["Slot"] = df["Slot"].apply(convert)
df[["Day","Slot No."]] = df["Slot"].str.split(".",expand = True)
df.head(10)
Slot | Roll | Department | Program | Company | Job code | Day | Slot No. | |
---|---|---|---|---|---|---|---|---|
0 | 10.1 | 183059003 | Computer Science & Engineering | M.Tech. | Colortokens | 1 | 10 | 1 |
1 | 10.1 | 193100039 | Mechanical Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 | 10 | 1 |
2 | 10.1 | 195280009 | Applied Statistics and Informatics | M.Sc. | NeuralTechSoft | 1 | 10 | 1 |
3 | 10.1 | 193070021 | Electrical Engineering | M.Tech. | Ignitarium Technology Solutions Pvt Ltd | 1 | 10 | 1 |
4 | 10.1 | 193060032 | Earth Sciences | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 | 10 | 1 |
5 | 10.1 | 183310001 | Geoinformatics and Natural Resources Engineering | M.Tech. | Regent Climate Connect Knowledge Solutions Pri... | 1 | 10 | 1 |
6 | 10.1 | 193050057 | Computer Science & Engineering | M.Tech. | Colortokens | 1 | 10 | 1 |
7 | 10.1 | 170070003 | Electrical Engineering | B.Tech. | BeeHyv | 1 | 10 | 1 |
8 | 10.1 | 193170012 | Energy Science and Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 | 10 | 1 |
9 | 10.1 | 183109005 | Mechanical Engineering | M.Tech. | Applied Materials | 2 | 10 | 1 |
df.tail()
Slot | Roll | Department | Program | Company | Job code | Day | Slot No. | |
---|---|---|---|---|---|---|---|---|
1134 | 16.3 | 170050005 | Computer Science & Engineering | B.Tech. | Placement Office | 4 | 16 | 3 |
1135 | 16.3 | 170100028 | Mechanical Engineering | B.Tech. | Placement Office | 4 | 16 | 3 |
1136 | 16.3 | 193100059 | Mechanical Engineering | M.Tech. | BYJU'S - The Learning App | 1 | 16 | 3 |
1137 | 16.3 | 193100075 | Mechanical Engineering | M.Tech. | Solar Energy Corporation Of India Limited | 2 | 16 | 3 |
1138 | 16.3 | 16D070061 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | Accenture | 1 | 16 | 3 |
df.drop(columns = "Slot", inplace=True)
df["Day"] = pd.to_numeric(df["Day"])
df["Slot No."] = pd.to_numeric(df["Slot No."])
df.head()
Roll | Department | Program | Company | Job code | Day | Slot No. | |
---|---|---|---|---|---|---|---|
0 | 183059003 | Computer Science & Engineering | M.Tech. | Colortokens | 1 | 10 | 1 |
1 | 193100039 | Mechanical Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 | 10 | 1 |
2 | 195280009 | Applied Statistics and Informatics | M.Sc. | NeuralTechSoft | 1 | 10 | 1 |
3 | 193070021 | Electrical Engineering | M.Tech. | Ignitarium Technology Solutions Pvt Ltd | 1 | 10 | 1 |
4 | 193060032 | Earth Sciences | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 1 | 10 | 1 |
df.drop(columns = "Job code", inplace=True)
df.head()
Roll | Department | Program | Company | Day | Slot No. | |
---|---|---|---|---|---|---|
0 | 183059003 | Computer Science & Engineering | M.Tech. | Colortokens | 10 | 1 |
1 | 193100039 | Mechanical Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 10 | 1 |
2 | 195280009 | Applied Statistics and Informatics | M.Sc. | NeuralTechSoft | 10 | 1 |
3 | 193070021 | Electrical Engineering | M.Tech. | Ignitarium Technology Solutions Pvt Ltd | 10 | 1 |
4 | 193060032 | Earth Sciences | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 10 | 1 |
df_BTech = df[df.Program == "B.Tech."]
df_BTech.head()
Roll | Department | Program | Company | Day | Slot No. | |
---|---|---|---|---|---|---|
7 | 170070003 | Electrical Engineering | B.Tech. | BeeHyv | 10 | 1 |
16 | 160110047 | Metallurgical Engineering and Materials Science | B.Tech. | Agrifi | 10 | 1 |
17 | 170050085 | Computer Science & Engineering | B.Tech. | Agrifi | 10 | 1 |
19 | 170020120 | Chemical Engineering | B.Tech. | BeeHyv | 10 | 1 |
23 | 170100087 | Mechanical Engineering | B.Tech. | BeeHyv | 10 | 1 |
df_MTech = df[df.Program == "M.Tech."]
df_MTech.head()
Roll | Department | Program | Company | Day | Slot No. | |
---|---|---|---|---|---|---|
0 | 183059003 | Computer Science & Engineering | M.Tech. | Colortokens | 10 | 1 |
1 | 193100039 | Mechanical Engineering | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 10 | 1 |
3 | 193070021 | Electrical Engineering | M.Tech. | Ignitarium Technology Solutions Pvt Ltd | 10 | 1 |
4 | 193060032 | Earth Sciences | M.Tech. | Impact Guru Technology Ventures Pvt. Ltd. | 10 | 1 |
5 | 183310001 | Geoinformatics and Natural Resources Engineering | M.Tech. | Regent Climate Connect Knowledge Solutions Pri... | 10 | 1 |
df_Dual = df[df.Program.str.contains("Dual Degree")]
df_Dual.head()
Roll | Department | Program | Company | Day | Slot No. | |
---|---|---|---|---|---|---|
14 | 16D070026 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | Ignitarium Technology Solutions Pvt Ltd | 10 | 1 |
36 | 16D070027 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | Agrifi | 10 | 1 |
38 | 160110023 | Metallurgical Engineering and Materials Science | Dual Degree (B.Tech. + M.Tech.) | BeeHyv | 10 | 1 |
55 | 160110071 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | Boston Consulting Group | 1 | 1 |
56 | 16D170019 | Energy Science and Engineering, Energy Science... | Dual Degree (B.Tech. + M.Tech.) | Procter and Gamble | 1 | 1 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1139 entries, 0 to 1138 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Roll 1139 non-null object 1 Department 1139 non-null object 2 Program 1139 non-null object 3 Company 1139 non-null object 4 Day 1139 non-null int64 5 Slot No. 1139 non-null int64 dtypes: int64(2), object(4) memory usage: 53.5+ KB
slot_data = df.groupby("Slot No.",as_index=0)["Roll"].count()
slot_data = slot_data.reset_index(drop = 1)
slot_data.rename({"Roll":"No. of Students"},axis=1,inplace=True)
slot_data
# EXPORT AS SHEET
Slot No. | No. of Students | |
---|---|---|
0 | 1 | 609 |
1 | 2 | 216 |
2 | 3 | 314 |
Department_Stats_Breakdown = df.groupby(["Department","Program"], as_index = False)["Roll"].count().sort_values(["Roll","Program","Department"], ascending = [False,True,True])
Department_Stats_Breakdown.reset_index(drop = 1, inplace = True)
Department_Stats_Breakdown.head(15)
Department | Program | Roll | |
---|---|---|---|
0 | Computer Science & Engineering | B.Tech. | 108 |
1 | Electrical Engineering | M.Tech. | 101 |
2 | Computer Science & Engineering | M.Tech. | 88 |
3 | Mechanical Engineering | B.Tech. | 78 |
4 | Chemical Engineering | B.Tech. | 73 |
5 | Electrical Engineering | B.Tech. | 57 |
6 | Civil Engineering | B.Tech. | 56 |
7 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | 48 |
8 | Industrial Design Centre | M.Des. | 48 |
9 | Metallurgical Engineering and Materials Science | B.Tech. | 47 |
10 | Civil Engineering | M.Tech. | 36 |
11 | Mechanical Engineering | M.Tech. | 36 |
12 | Aerospace Engineering | B.Tech. | 28 |
13 | Metallurgical Engineering and Materials Science | M.Tech. | 24 |
14 | Mechanical Engineering | Dual Degree (B.Tech. + M.Tech.) | 23 |
Program_Stats = df.groupby("Program",as_index = 0)["Roll"].count().sort_values("Roll",ascending = False)
Program_Stats.reset_index(drop = True, inplace= True)
Program_Stats.rename({"Roll":"No. of Students"},axis=1,inplace=True)
Program_Stats
# EXPORT AS SHEET
Program | No. of Students | |
---|---|---|
0 | B.Tech. | 466 |
1 | M.Tech. | 398 |
2 | Dual Degree (B.Tech. + M.Tech.) | 132 |
3 | M.Des. | 48 |
4 | M.Sc. | 37 |
5 | B.S. | 20 |
6 | B.Des. | 12 |
7 | Ph.D. | 10 |
8 | Dual Degree (B.Des. + M. Des.) | 9 |
9 | Dual Degree (M.Tech. + Ph.D.) | 4 |
10 | M.P.P | 1 |
11 | M.Phil. | 1 |
12 | M.S. by research (Exit Degree) | 1 |
Program_Stats_Percentage = pd.DataFrame(round(df.value_counts("Program", normalize = True)*100,2))
Program_Stats_Percentage.reset_index(inplace = True)
Program_Stats_Percentage.rename({0:"% of Students"},axis=1,inplace=True)
Program_Stats_Percentage
# EXPORT AS SHEET
Program | % of Students | |
---|---|---|
0 | B.Tech. | 40.91 |
1 | M.Tech. | 34.94 |
2 | Dual Degree (B.Tech. + M.Tech.) | 11.59 |
3 | M.Des. | 4.21 |
4 | M.Sc. | 3.25 |
5 | B.S. | 1.76 |
6 | B.Des. | 1.05 |
7 | Ph.D. | 0.88 |
8 | Dual Degree (B.Des. + M. Des.) | 0.79 |
9 | Dual Degree (M.Tech. + Ph.D.) | 0.35 |
10 | M.P.P | 0.09 |
11 | M.Phil. | 0.09 |
12 | M.S. by research (Exit Degree) | 0.09 |
company_stats = df.groupby(["Company","Day","Slot No."],as_index=False)["Roll"].count().sort_values(["Roll","Company"], ascending = [0,1])
company_stats.reset_index(drop = True, inplace=True)
company_stats.shape
(285, 4)
company_stats[company_stats["Company"].str.contains("Hp",flags=re.I, regex=True)]
Company | Day | Slot No. | Roll |
---|
company_stats.rename({"Roll":"No. of Students"},axis=1,inplace=True)
company_stats.head(10)
# EXPORT AS SHEET
Company | Day | Slot No. | No. of Students | |
---|---|---|---|---|
0 | Placement Office | 16 | 3 | 147 |
1 | Oracle | 2 | 1 | 23 |
2 | TATA Projects | 3 | 1 | 16 |
3 | Texas Instruments | 1 | 1 | 15 |
4 | Axis Bank | 3 | 1 | 13 |
5 | Google India Pvt. Ltd. | 1 | 1 | 13 |
6 | ICICI Lombard GIC Ltd. | 5 | 1 | 12 |
7 | Micron Technologies Operation India LLP | 3 | 1 | 12 |
8 | Bridgei2i Analytics Solutions | 16 | 3 | 11 |
9 | ICICI Bank | 3 | 1 | 11 |
company_stats.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 285 entries, 0 to 284 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company 285 non-null object 1 Day 285 non-null int64 2 Slot No. 285 non-null int64 3 No. of Students 285 non-null int64 dtypes: int64(3), object(1) memory usage: 9.0+ KB
company_stats["Company"].nunique()
258
company_stats["Company"].count()
285
repeated_companies = company_stats[company_stats.duplicated(subset="Company", keep=False)]
repeated_companies = repeated_companies.sort_values("Company")
repeated_companies.reset_index(drop=True,inplace=True)
repeated_companies.head()
# EXPORT AS SHEET
Company | Day | Slot No. | No. of Students | |
---|---|---|---|---|
0 | ANZ Bank | 3 | 1 | 4 |
1 | ANZ Bank | 1 | 1 | 2 |
2 | ANZ Bank | 3 | 2 | 5 |
3 | Applied Materials | 10 | 1 | 2 |
4 | Applied Materials | 3 | 1 | 2 |
repeated_companies_total = repeated_companies.groupby(["Company"],as_index=False)["No. of Students"].sum()
repeated_companies_total = repeated_companies_total.sort_values(["No. of Students","Company"],ascending=[0,1])
repeated_companies_total.reset_index(drop=True,inplace=True)
repeated_companies_total
# EXPORT AS SHEET
Company | No. of Students | |
---|---|---|
0 | Tata Consultancy Services | 26 |
1 | Axis Bank | 20 |
2 | Intel | 12 |
3 | Microsoft India Pvt. Ltd. | 12 |
4 | ANZ Bank | 11 |
5 | Qualcomm | 11 |
6 | Enphase Energy | 10 |
7 | HCL Technologies Ltd. | 10 |
8 | IDFC First Bank | 10 |
9 | Jaguar Land Rover India Limited | 10 |
10 | Bajaj Auto Ltd | 8 |
11 | LnT Infotech | 8 |
12 | Samsung Research Institute, Bangalore | 8 |
13 | pharmaACE- Analytics Centre of Excellence | 8 |
14 | Flipkart | 6 |
15 | Infosys | 5 |
16 | Navi Technologies Pvt Ltd | 5 |
17 | Applied Materials | 4 |
18 | HiLabs Inc. | 4 |
19 | KLA Tencor | 4 |
20 | Procter and Gamble | 4 |
21 | Sprinklr | 4 |
22 | Quantsapp Private limited | 3 |
top_companies = company_stats.groupby("Company",as_index=False)["No. of Students"].sum()
top_companies = top_companies.sort_values(["No. of Students","Company"],ascending=[0,1])
top_companies.reset_index(drop=True,inplace=True)
top_companies.head(20)
# EXPORT AS SHEET
Company | No. of Students | |
---|---|---|
0 | Placement Office | 147 |
1 | Tata Consultancy Services | 26 |
2 | Oracle | 23 |
3 | Axis Bank | 20 |
4 | TATA Projects | 16 |
5 | Texas Instruments | 15 |
6 | Google India Pvt. Ltd. | 13 |
7 | ICICI Lombard GIC Ltd. | 12 |
8 | Intel | 12 |
9 | Micron Technologies Operation India LLP | 12 |
10 | Microsoft India Pvt. Ltd. | 12 |
11 | ANZ Bank | 11 |
12 | Bridgei2i Analytics Solutions | 11 |
13 | ICICI Bank | 11 |
14 | Qualcomm | 11 |
15 | Accenture | 10 |
16 | Enphase Energy | 10 |
17 | ExxonMobil Lubricants Private Limited | 10 |
18 | HCL Technologies Ltd. | 10 |
19 | IDFC First Bank | 10 |
def dept_splitter(x):
splitted = x.split(", ")
if len(splitted)==1:
return x
elif splitted[0]==splitted[1]:
return splitted[0]
else:
return x
top_branches = Department_Stats_Breakdown.copy()
top_branches["Department"] = top_branches["Department"].apply(dept_splitter)
top_branches = top_branches.rename({"Roll":"No. of Students"},axis=1)
top_branches.head(20)
# EXPORT AS SHEET
Department | Program | No. of Students | |
---|---|---|---|
0 | Computer Science & Engineering | B.Tech. | 108 |
1 | Electrical Engineering | M.Tech. | 101 |
2 | Computer Science & Engineering | M.Tech. | 88 |
3 | Mechanical Engineering | B.Tech. | 78 |
4 | Chemical Engineering | B.Tech. | 73 |
5 | Electrical Engineering | B.Tech. | 57 |
6 | Civil Engineering | B.Tech. | 56 |
7 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | 48 |
8 | Industrial Design Centre | M.Des. | 48 |
9 | Metallurgical Engineering and Materials Science | B.Tech. | 47 |
10 | Civil Engineering | M.Tech. | 36 |
11 | Mechanical Engineering | M.Tech. | 36 |
12 | Aerospace Engineering | B.Tech. | 28 |
13 | Metallurgical Engineering and Materials Science | M.Tech. | 24 |
14 | Mechanical Engineering | Dual Degree (B.Tech. + M.Tech.) | 23 |
15 | Applied Statistics and Informatics | M.Sc. | 23 |
16 | Geoinformatics and Natural Resources Engineering | M.Tech. | 23 |
17 | Aerospace Engineering | M.Tech. | 20 |
18 | Engineering Physics | B.Tech. | 19 |
19 | Energy Science and Engineering | Dual Degree (B.Tech. + M.Tech.) | 16 |
top_branches_all = top_branches.groupby("Department",as_index=False)["No. of Students"].sum()
top_branches_all = top_branches_all.sort_values(["No. of Students","Department"],ascending=[0,1])
top_branches_all.reset_index(drop = True, inplace = True)
top_branches_all.head(10)
# EXPORT AS SHEET
Department | No. of Students | |
---|---|---|
0 | Electrical Engineering | 219 |
1 | Computer Science & Engineering | 198 |
2 | Mechanical Engineering | 144 |
3 | Civil Engineering | 97 |
4 | Chemical Engineering | 89 |
5 | Metallurgical Engineering and Materials Science | 86 |
6 | Industrial Design Centre | 69 |
7 | Aerospace Engineering | 54 |
8 | Energy Science and Engineering | 30 |
9 | Engineering Physics | 24 |
top_branches_BTech = top_branches.loc[top_branches.Program == "B.Tech.",["Department","No. of Students"]]
top_branches_BTech.reset_index(drop = True, inplace=True)
top_branches_BTech
# EXPORT AS SHEET
Department | No. of Students | |
---|---|---|
0 | Computer Science & Engineering | 108 |
1 | Mechanical Engineering | 78 |
2 | Chemical Engineering | 73 |
3 | Electrical Engineering | 57 |
4 | Civil Engineering | 56 |
5 | Metallurgical Engineering and Materials Science | 47 |
6 | Aerospace Engineering | 28 |
7 | Engineering Physics | 19 |
top_branches_MTech = top_branches.loc[top_branches.Program == "M.Tech.",["Department","No. of Students"]]
top_branches_MTech.reset_index(drop = True, inplace=True)
top_branches_MTech
# EXPORT AS SHEET
Department | No. of Students | |
---|---|---|
0 | Electrical Engineering | 101 |
1 | Computer Science & Engineering | 88 |
2 | Civil Engineering | 36 |
3 | Mechanical Engineering | 36 |
4 | Metallurgical Engineering and Materials Science | 24 |
5 | Geoinformatics and Natural Resources Engineering | 23 |
6 | Aerospace Engineering | 20 |
7 | Chemical Engineering | 15 |
8 | Industrial Engineering & Operations Research | 15 |
9 | Energy Science and Engineering | 14 |
10 | Technology and Development | 8 |
11 | Environmental Science & Engineering | 6 |
12 | Earth Sciences | 5 |
13 | Systems & Control Engineering | 5 |
14 | Biosciences and Bioengineering | 2 |
top_branches_other = top_branches.loc[~top_branches.Program.isin(["M.Tech.","B.Tech."])]
top_branches_other.reset_index(drop=True,inplace=True)
top_branches_other.head()
# EXPORT AS SHEET
Department | Program | No. of Students | |
---|---|---|---|
0 | Electrical Engineering | Dual Degree (B.Tech. + M.Tech.) | 48 |
1 | Industrial Design Centre | M.Des. | 48 |
2 | Mechanical Engineering | Dual Degree (B.Tech. + M.Tech.) | 23 |
3 | Applied Statistics and Informatics | M.Sc. | 23 |
4 | Energy Science and Engineering | Dual Degree (B.Tech. + M.Tech.) | 16 |
# %%script echo "Skipping execution of this cell."
# Please uncomment above line ( only remove '# ' ) to stop executing below codes and creating the excel file again and again each time you run all cells.
with pd.ExcelWriter("Allocation_Analysis.xlsx") as w:
df_BTech.to_excel(w,sheet_name="B.Tech Details",index=False,freeze_panes=(1,0))
df_MTech.to_excel(w,sheet_name="M.Tech Details",index=False,freeze_panes=(1,0))
df_Dual.to_excel(w,sheet_name="Dual Degree Details",index=False,freeze_panes=(1,0))
slot_data.to_excel(w,sheet_name="Slot Analysis",index=False,freeze_panes=(1,0))
Program_Stats.to_excel(w,sheet_name="Program_Stats",index=False,freeze_panes=(1,0))
Program_Stats_Percentage.to_excel(w,sheet_name="Program_Stats_Percentage",index=False,freeze_panes=(1,0))
company_stats.to_excel(w,sheet_name="Company_Stats",index=False,freeze_panes=(1,0))
repeated_companies.to_excel(w,sheet_name="Repeating Companies",index=False,freeze_panes=(1,0))
repeated_companies_total.to_excel(w,sheet_name="Best repeating Companies",index=False,freeze_panes=(1,0))
top_companies.to_excel(w,sheet_name="Top Recruiters",index=False,freeze_panes=(1,0))
top_branches.to_excel(w,sheet_name="Best Program & Department",index=False,freeze_panes=(1,0))
top_branches_all.to_excel(w,sheet_name="Overall Best Department",index=False,freeze_panes=(1,0))
top_branches_BTech.to_excel(w,sheet_name="Best B.Tech Department",index=False,freeze_panes=(1,0))
top_branches_MTech.to_excel(w,sheet_name="Best M.Tech Department",index=False,freeze_panes=(1,0))
top_branches_other.to_excel(w,sheet_name="Best_Dept_other_Programs",index=False,freeze_panes=(1,0))