import numpy as np
import pandas as pd
# Load & preview 2019 test scores
scores_2019_all = pd.read_csv('SBA_2018-19.csv')
scores_2019_all.head()
# Load & preview 2022 test scores
scores_2022_all = pd.read_csv('SBA_2021-22.csv')
scores_2022_all.head()
print("(Rows, columns) of 2019 data:", scores_2019_all.shape)
print("(Rows, columns) of 2022 data:", scores_2022_all.shape)
# View basic information on the 2019 test scores
scores_2022_all.info()
# View 2019 summary information
scores_2019_all.describe()
# 2018-2019 SY
# Select columns for analysis
scores_2019 = scores_2019_all[['County', 'TestAdministration', 'TestSubject', 'GradeLevel', 'DistrictName',
'SchoolName', 'StudentGroupType', 'PercentMetTestedOnly',
'Count of Students Expected to Test', 'CountMetStandard']]
# Select rows where TestAdministration is SBAC and GradeLevel is 10
scores_2019 = scores_2019.query(' TestAdministration=="SBAC" & GradeLevel=="10" ')
# Add column to calculate PercentMet
scores_2019['ManualPercentMet'] = scores_2019['CountMetStandard'] / scores_2019['Count of Students Expected to Test']
scores_2019.head(10)
# 2021-2022 SY
# Select columns for analysis
scores_2022 = scores_2022_all[['County', 'TestAdministration', 'TestSubject', 'GradeLevel', 'DistrictName',
'SchoolName', 'StudentGroupType', 'PercentMetTestedOnly',
'Count of Students Expected to Test', 'CountMetStandard']]
# Select rows where TestAdministration is SBAC and GradeLevel is 10
scores_2022 = scores_2022.query(' TestAdministration=="SBAC" & GradeLevel=="10" ')
# Add column to calculate PercentMet
scores_2022['ManualPercentMet'] = scores_2022['CountMetStandard'] / scores_2022['Count of Students Expected to Test']
scores_2022.head(10)
# 18-19 SY
# Select rows where SchoolName is "District Total" and StudentGroupType is "All"
scores_2019_summary = scores_2019.query(' SchoolName=="District Total" & StudentGroupType=="All" ')
# Multiply values in PercentMetTestedOnly by 100
scores_2019_summary['PercentMetTestedOnly'] *= 100
# Drop unnecessary columns after filtering
scores_2019_summary.drop(columns=['TestAdministration', 'GradeLevel', 'SchoolName',
'StudentGroupType', 'Count of Students Expected to Test',
'ManualPercentMet'], inplace=True)
# Rename columns
scores_2019_summary.rename(columns={'PercentMetTestedOnly' : '2019PercentMet',
'CountMetStandard' : '2019CountMet'}, inplace=True)
# View results
scores_2019_summary.sort_values(by=['County', 'DistrictName', 'TestSubject']).head(15)
# 21-22 SY
# Select rows where SchoolName is "District Total" and StudentGroupType is "All"
scores_2022_summary = scores_2022.query(' SchoolName=="District Total" & StudentGroupType=="All" ')
# Multiply values in PercentMetTestedOnly by 100
scores_2022_summary['PercentMetTestedOnly'] *= 100
# Drop unnecessary columns after filtering
scores_2022_summary.drop(columns=['TestAdministration', 'GradeLevel', 'SchoolName',
'StudentGroupType', 'Count of Students Expected to Test',
'ManualPercentMet'], inplace=True)
# Rename the last column
scores_2022_summary.rename(columns={'PercentMetTestedOnly' : '2022PercentMet',
'CountMetStandard' : '2022CountMet'}, inplace=True)
# View results
scores_2022_summary.sort_values(by=['County', 'DistrictName', 'TestSubject']).head(15)
avg_2019 = scores_2019_summary.groupby(['County', 'TestSubject']).agg({'2019PercentMet':'mean',
'2019CountMet':'sum'}).round(2).reset_index()
avg_2019.head(10)
avg_2022 = scores_2022_summary.groupby(['County', 'TestSubject']).agg({'2022PercentMet':'mean',
'2022CountMet':'sum'}).round(2).reset_index()
avg_2022.head(10)
# Load fips
fips = pd.read_csv('fips_codes.csv')
# Keep only the county name & its 5-digit code
fips = fips[['COUNTY_NAME', 'COUNTYFPL']]
# Rename columns
fips.rename(columns = {'COUNTY_NAME':'County', 'COUNTYFPL':'FIPS'}, inplace=True)
# Add county codes to avg_2019
avg_2019 = avg_2019.merge(fips, how='left', on=['County'])
avg_2019.head(10)
# Add county codes to avg_2022
avg_2022 = avg_2022.merge(fips, how='left', on=['County'])
avg_2022.head(10)
# Combine avg_2019 & avg_2022 to one dataframe
avg_combined = avg_2019.merge(avg_2022, how='inner', on=['FIPS', 'County', 'TestSubject'])
avg_combined.insert(0, 'FIPS', avg_combined.pop('FIPS')) # move FIPS column to front
# Add column: difference in met standard percentage between the school years
avg_combined['PercentChange'] = avg_combined['2022PercentMet'] - avg_combined['2019PercentMet']
avg_combined.sort_values(by=['County', 'TestSubject']).head(10)
scores_2019_summary[scores_2019_summary['County']=='King']
scores_2022_summary[scores_2022_summary['County']=='King']
avg_combined[avg_combined['County'] == 'King']
import plotly
import plotly.express as px
# Load the GeoJSON file containing county geometries
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
#counties["features"][0]
# Map 2019 ELA data
fig_ela_2019 = px.choropleth(
avg_2019[avg_2019['TestSubject']=='ELA'], # data to use
geojson = counties, # map to GeoJSON county list
locations = 'FIPS',
color = '2019PercentMet',
projection = "mercator",
hover_name = 'County',
# Data shown when hovered over
hover_data = {'FIPS':False, '2019CountMet' : ':,', '2019PercentMet': ':.1f%'},
title = '2019 SBA ELA: Percent of Students that Met Standard by County',
color_continuous_scale = "Greens",
#labels = {'2019PercentMet' : "Average % of Met"},
range_color = (0, 100))
# Update layout
fig_ela_2019.update_geos(fitbounds="locations", visible=False)
fig_ela_2019.update_layout(width=850, height=600)
fig_ela_2019.show()
# Map 2022 ELA data
fig_ela_2022 = px.choropleth(
avg_2022[avg_2022['TestSubject']=='ELA'], # data to use
geojson = counties, # map to GeoJSON county list
locations = 'FIPS',
color = '2022PercentMet',
projection = "mercator",
hover_name = 'County',
hover_data = {'FIPS':False, '2022CountMet' : ':,'}, # data shown when hovered over
title = '2022 SBA ELA: Percent of Students that Met Standard by County',
color_continuous_scale = "Greens",
labels = {'2022PercentMet' : "Average % of Met"},
range_color = (0, 100))
# Update layout
fig_ela_2022.update_geos(fitbounds="locations", visible=False)
fig_ela_2022.update_layout(width=850, height=600)
fig_ela_2022.show()
# Map 2019 Math data
fig_math_2019 = px.choropleth(avg_2019[avg_2019['TestSubject']=='Math'], # data to use
geojson = counties,
locations = 'FIPS',
color = '2019PercentMet',
projection = "mercator",
hover_name = 'County',
hover_data = {'FIPS':False, '2019CountMet' : ':,'},
title = '2019 SBA Math: Met Standard Percent by County',
color_continuous_scale = "Greens",
#labels = {'2019 % Met Math' : "Average % of Met"},
range_color = (0, 100))
# Update layout
fig_math_2019.update_geos(fitbounds="locations", visible=False)
fig_math_2019.update_layout(width=850, height=600)
fig_math_2019.show()
# Map 2022 Math data
fig_math_2022 = px.choropleth(avg_2022[avg_2022['TestSubject']=='Math'], # data to use
geojson = counties,
locations = 'FIPS',
color = '2022PercentMet',
projection = "mercator",
hover_name = 'County',
hover_data = {'FIPS':False, '2022CountMet' : ':,'},
title = '2022 SBA Math: Met Standard Percent by County',
color_continuous_scale = "Greens",
labels = {'2022 % Met Math' : "Average % of Met"},
range_color = (0, 100))
# Update layout
fig_math_2022.update_geos(fitbounds="locations", visible=False)
fig_math_2022.update_layout(width=850, height=600)
fig_math_2022.show()
# Map ELA delta
fig_ela_change = px.choropleth(avg_combined[avg_combined['TestSubject']=='ELA'],
geojson = counties, # map to GeoJSON county list
locations = 'FIPS',
color = 'PercentChange',
projection = "mercator",
hover_name = 'County',
hover_data = {'FIPS':False},
title = 'Change in ELA',
color_continuous_scale = px.colors.diverging.RdYlGn,
color_continuous_midpoint = 0,
#labels = {'ELA Change' : "19-22 Change"},
range_color = (-25, 25))
# Update layout
fig_ela_change.update_geos(fitbounds="locations", visible=False)
fig_ela_change.update_layout(width=850, height=600)
fig_ela_change.show()
# Map Math delta
fig_math_change = px.choropleth(avg_combined[avg_combined['TestSubject']=='Math'],
geojson = counties, # map to GeoJSON county list
locations = 'FIPS',
color = 'PercentChange',
projection = "mercator",
hover_name = 'County',
hover_data = {'FIPS':False},
title = 'Change in Math',
color_continuous_scale = px.colors.diverging.RdYlGn,
color_continuous_midpoint = 0,
#labels = {'Math Change' : "19-22 Change"},
range_color = (-25, 25))
# Update layout
fig_math_change.update_geos(fitbounds="locations", visible=False)
fig_math_change.update_layout(width=850, height=600)
fig_math_change.show()