# import libraries
import pandas as pd
import matplotlib.pyplot as plt
# read in the data set
women_degrees_df = pd.read_csv("http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv")
# print tabular information
women_degrees_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 18 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Year 42 non-null int64
1 Agriculture 42 non-null float64
2 Architecture 42 non-null float64
3 Art and Performance 42 non-null float64
4 Biology 42 non-null float64
5 Business 42 non-null float64
6 Communications and Journalism 42 non-null float64
7 Computer Science 42 non-null float64
8 Education 42 non-null float64
9 Engineering 42 non-null float64
10 English 42 non-null float64
11 Foreign Languages 42 non-null float64
12 Health Professions 42 non-null float64
13 Math and Statistics 42 non-null float64
14 Physical Sciences 42 non-null float64
15 Psychology 42 non-null float64
16 Public Administration 42 non-null float64
17 Social Sciences and History 42 non-null float64
dtypes: float64(17), int64(1)
memory usage: 6.0 KB
women_degrees_df.describe()
# print row and column information
women_degrees_df.head()
### code that we'll re-use throughout the analysis
# matplotlib requires colour values in a range of 0-1, which is why we're dividing the RGB values by 255
PRIMARY_COLOR = (95/255, 158/255, 209/255)
SECONDARY_COLOR = (255/255, 128/255, 14/255)
SUPPORT_COLOR = (171/255, 171/255, 171/255)
# setting a consistent line width
LINE_WIDTH = 2
# function that adds a number of degree subplots
def add_degrees_subplots(fig, degrees, rows, columns, annotations, ax_callback=None):
for i in range(0, len(degrees)):
# add the subplot
ax = fig.add_subplot(rows, columns, i+1)
# plot the trends in gender involvement over time
ax.plot(women_degrees_df["Year"], women_degrees_df[degrees[i]], c=SECONDARY_COLOR, label="Women", linewidth=LINE_WIDTH)
ax.plot(women_degrees_df["Year"], 100 - women_degrees_df[degrees[i]], c=PRIMARY_COLOR, label="Men", linewidth=LINE_WIDTH)
# remove the x-axis tick marks
ax.tick_params(bottom=False, top=False, left=False, right=False)
# remove the borders around the charts
for key, spine in ax.spines.items():
spine.set_visible(False)
# restrict the range of x values and y values
ax.set_xlim(1968, 2011)
ax.set_ylim(0, 100)
# set the title
ax.set_title(degrees[i])
# add custom annotations for the first chart and the last
if i == 0:
ax.text(annotations[0]["x"], annotations[0]["y"], annotations[0]["label"])
ax.text(annotations[1]["x"], annotations[1]["y"], annotations[1]["label"])
elif i == len(degrees) - 1:
ax.text(annotations[2]["x"], annotations[2]["y"], annotations[2]["label"])
ax.text(annotations[3]["x"], annotations[3]["y"], annotations[3]["label"])
# return a callback for increased customisation of each axis
if ax_callback is not None:
ax_callback(ax, degrees[i])
# group stem degrees
stem_degrees = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
# return the figure object which we'll use to add the subplots
fig = plt.figure(figsize=(24, 4))
# add the degree subplots and annotations comparing male and female involvement
add_degrees_subplots(fig, stem_degrees, 1, 6, [
{ "x": 2005, "y": 87, "label": "Men" },
{ "x": 2002, "y": 8, "label": "Women" },
{ "x": 2005, "y": 62, "label": "Men" },
{ "x": 2001, "y": 35, "label": "Women" }
])
# show the plots
plt.show()
# group liberal arts
arts_degrees = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
# return the figure object which we'll use to add the subplots
fig = plt.figure(figsize=(24, 4))
# for each degree, add a subplot and annotations comparing male and female involvement
add_degrees_subplots(fig, arts_degrees, 1, 5, [
{ "x": 2005, "y": 20, "label": "Men" },
{ "x": 2002, "y": 76, "label": "Women" },
{ "x": 2005, "y": 58, "label": "Men" },
{ "x": 2001, "y": 36, "label": "Women" }
])
# show the plots
plt.show()
# group the remaining degrees
other_degrees = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
# return the figure object which we'll use to add the subplots
fig = plt.figure(figsize=(24, 4))
# for each degree, add a subplot and annotations comparing male and female involvement
add_degrees_subplots(fig, other_degrees, 1, 6, [
{ "x": 2005, "y": 6, "label": "Men" },
{ "x": 2002, "y": 90, "label": "Women" },
{ "x": 2005, "y": 64, "label": "Men" },
{ "x": 2001, "y": 32, "label": "Women" }
])
# show the plots
plt.show()
def ax_callback(ax, degree):
if degree not in other_degrees:
ax.tick_params(labelbottom=False)
ax.set_yticks([0,100])
ax.axhline(50, c=(SUPPORT_COLOR), alpha=0.3)
# group all the degrees
all_degrees = stem_degrees + arts_degrees + other_degrees
# return the figure object which we'll use to add the subplots
fig = plt.figure(figsize=(24, 10))
# for each degree, add a subplot and annotations comparing male and female involvement
add_degrees_subplots(fig, all_degrees, 3, 6, [
{ "x": 2005, "y": 90, "label": "Men" },
{ "x": 2002, "y": 6, "label": "Women" },
{ "x": 2005, "y": 64, "label": "Men" },
{ "x": 2001, "y": 32, "label": "Women" }
], ax_callback)
# show the plots
plt.show()