Python - Exam Prep Course Efficacy Analysis

import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots

df = pd.read_csv('exams.csv') df.head()

df.describe(include='all').round(2)

df.info()

df.isnull().sum()

# Creating separate data frame with average mean by test_prep_course completion read_prep = df.groupby('test_prep_course').agg(avg_reading=('reading', 'mean')).round(4) # Add percent change column read_prep['pct_change'] = read_prep.pct_change().round(4) read_prep

# Vizualize average reading scores by test_prep_course completion fig = px.box(df, x='reading', y='test_prep_course', color='test_prep_course') # Update titles and remove legend fig.update_layout( showlegend=False, title='Average reading scores by test prep course completion', xaxis_title='reading scores', yaxis_title='test preparation course') # Show graph fig.show()

# Create data frame grouped by parent_education_level parent_avg = df.groupby('parent_education_level')[['math', 'reading', 'writing']].mean() # Sort values parent_avg = parent_avg.sort_values('writing', ascending=False).round(2) parent_avg.columns.name = 'subject' parent_avg

# Visualize avg subject scores by parental education level fig = px.line(parent_avg, facet_col='subject', markers=True) # Remove '=' sign from facet titles fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) # Update titles fig.update_xaxes(title_text='parent education level') fig.update_layout(title='Average subject scores by parent education level', yaxis_title='average score', showlegend=False) # Show graph fig.show()

# Create data frame grouped by parent_education_level and test_prep_course parent_edu = df.groupby(['parent_education_level', 'test_prep_course'])[['math', 'reading', 'writing']].mean().unstack() # Sort values to ensure parent_education_level is ordered properly parent_edu = parent_edu.sort_values(('writing', 'completed'), ascending=False).round(2) parent_edu

subject_list = ['math', 'reading', 'writing'] for i in subject_list: x = (parent_edu[i].pct_change(axis=1) * -100).round(2) min_level = x['none'].idxmin() max_level = x['none'].idxmax() min_improve = x['none'].min() max_improve = x['none'].max() print('{}: {} has lowest improvement {}%'.format( i, min_level, min_improve)) print('{}: {} has highest improvement {}%\n'.format( i, max_level, max_improve))

# Initiate subplots fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.01, shared_yaxes=True, subplot_titles=('course completed', 'not completed')) # Average scores, by test_prep_course completed fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['math', 'completed'], name='math', legendgroup='a', line=dict(width=3)), row=1, col=1) fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['reading', 'completed'], name='reading', legendgroup='a', line=dict(width=3)), row=1, col=1) fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['writing', 'completed'], name='writing', legendgroup='a', line=dict(width=3)), row=1, col=1) # Average scores, by test_prep_course not completed fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['math', 'none'], name='math', legendgroup='b'), row=1, col=2) fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['reading', 'none'], name='reading', legendgroup='b'), row=1, col=2) fig.add_trace(go.Scatter(x=parent_edu.index, y=parent_edu['writing', 'none'], name='writing', legendgroup='b'), row=1, col=2) # Update titles fig.update_layout(legend_tracegroupgap=50, yaxis_title='average score', title_text='Average scores by category and test prep') fig.update_xaxes(title_text='parent education level') # Show graph fig.show()

fig = px.imshow(df.corr()) fig.show()