Python + SQL

import numpy as np import pandas as pd

# Load & preview 2019 test scores scores_2019_all = pd.read_csv('SBA_2018-19.csv') scores_2019_all.head()

# Load & preview 2022 test scores scores_2022_all = pd.read_csv('SBA_2021-22.csv') scores_2022_all.head()

print("(Rows, columns) of 2019 data:", scores_2019_all.shape) print("(Rows, columns) of 2022 data:", scores_2022_all.shape)

# View basic information on the 2019 test scores scores_2022_all.info()

# View 2019 summary information scores_2019_all.describe()

# 2018-2019 SY # Select columns for analysis scores_2019 = scores_2019_all[['County', 'TestAdministration', 'TestSubject', 'GradeLevel', 'DistrictName', 'SchoolName', 'StudentGroupType', 'PercentMetTestedOnly', 'Count of Students Expected to Test', 'CountMetStandard']] # Select rows where TestAdministration is SBAC and GradeLevel is 10 scores_2019 = scores_2019.query(' TestAdministration=="SBAC" & GradeLevel=="10" ') # Add column to calculate PercentMet scores_2019['ManualPercentMet'] = scores_2019['CountMetStandard'] / scores_2019['Count of Students Expected to Test'] scores_2019.head(10)

# 2021-2022 SY # Select columns for analysis scores_2022 = scores_2022_all[['County', 'TestAdministration', 'TestSubject', 'GradeLevel', 'DistrictName', 'SchoolName', 'StudentGroupType', 'PercentMetTestedOnly', 'Count of Students Expected to Test', 'CountMetStandard']] # Select rows where TestAdministration is SBAC and GradeLevel is 10 scores_2022 = scores_2022.query(' TestAdministration=="SBAC" & GradeLevel=="10" ') # Add column to calculate PercentMet scores_2022['ManualPercentMet'] = scores_2022['CountMetStandard'] / scores_2022['Count of Students Expected to Test'] scores_2022.head(10)

# 18-19 SY # Select rows where SchoolName is "District Total" and StudentGroupType is "All" scores_2019_summary = scores_2019.query(' SchoolName=="District Total" & StudentGroupType=="All" ') # Multiply values in PercentMetTestedOnly by 100 scores_2019_summary['PercentMetTestedOnly'] *= 100 # Drop unnecessary columns after filtering scores_2019_summary.drop(columns=['TestAdministration', 'GradeLevel', 'SchoolName', 'StudentGroupType', 'Count of Students Expected to Test', 'ManualPercentMet'], inplace=True) # Rename columns scores_2019_summary.rename(columns={'PercentMetTestedOnly' : '2019PercentMet', 'CountMetStandard' : '2019CountMet'}, inplace=True) # View results scores_2019_summary.sort_values(by=['County', 'DistrictName', 'TestSubject']).head(15)

# 21-22 SY # Select rows where SchoolName is "District Total" and StudentGroupType is "All" scores_2022_summary = scores_2022.query(' SchoolName=="District Total" & StudentGroupType=="All" ') # Multiply values in PercentMetTestedOnly by 100 scores_2022_summary['PercentMetTestedOnly'] *= 100 # Drop unnecessary columns after filtering scores_2022_summary.drop(columns=['TestAdministration', 'GradeLevel', 'SchoolName', 'StudentGroupType', 'Count of Students Expected to Test', 'ManualPercentMet'], inplace=True) # Rename the last column scores_2022_summary.rename(columns={'PercentMetTestedOnly' : '2022PercentMet', 'CountMetStandard' : '2022CountMet'}, inplace=True) # View results scores_2022_summary.sort_values(by=['County', 'DistrictName', 'TestSubject']).head(15)

avg_2019 = scores_2019_summary.groupby(['County', 'TestSubject']).agg({'2019PercentMet':'mean', '2019CountMet':'sum'}).round(2).reset_index() avg_2019.head(10)

avg_2022 = scores_2022_summary.groupby(['County', 'TestSubject']).agg({'2022PercentMet':'mean', '2022CountMet':'sum'}).round(2).reset_index() avg_2022.head(10)

# Load fips fips = pd.read_csv('fips_codes.csv') # Keep only the county name & its 5-digit code fips = fips[['COUNTY_NAME', 'COUNTYFPL']] # Rename columns fips.rename(columns = {'COUNTY_NAME':'County', 'COUNTYFPL':'FIPS'}, inplace=True)

# Add county codes to avg_2019 avg_2019 = avg_2019.merge(fips, how='left', on=['County']) avg_2019.head(10)

# Add county codes to avg_2022 avg_2022 = avg_2022.merge(fips, how='left', on=['County']) avg_2022.head(10)

# Combine avg_2019 & avg_2022 to one dataframe avg_combined = avg_2019.merge(avg_2022, how='inner', on=['FIPS', 'County', 'TestSubject']) avg_combined.insert(0, 'FIPS', avg_combined.pop('FIPS')) # move FIPS column to front # Add column: difference in met standard percentage between the school years avg_combined['PercentChange'] = avg_combined['2022PercentMet'] - avg_combined['2019PercentMet'] avg_combined.sort_values(by=['County', 'TestSubject']).head(10)

scores_2019_summary[scores_2019_summary['County']=='King']

scores_2022_summary[scores_2022_summary['County']=='King']

avg_combined[avg_combined['County'] == 'King']

import plotly import plotly.express as px # Load the GeoJSON file containing county geometries from urllib.request import urlopen import json with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response: counties = json.load(response) #counties["features"][0]

# Map 2019 ELA data fig_ela_2019 = px.choropleth( avg_2019[avg_2019['TestSubject']=='ELA'], # data to use geojson = counties, # map to GeoJSON county list locations = 'FIPS', color = '2019PercentMet', projection = "mercator", hover_name = 'County', # Data shown when hovered over hover_data = {'FIPS':False, '2019CountMet' : ':,', '2019PercentMet': ':.1f%'}, title = '2019 SBA ELA: Percent of Students that Met Standard by County', color_continuous_scale = "Greens", #labels = {'2019PercentMet' : "Average % of Met"}, range_color = (0, 100)) # Update layout fig_ela_2019.update_geos(fitbounds="locations", visible=False) fig_ela_2019.update_layout(width=850, height=600) fig_ela_2019.show()

# Map 2022 ELA data fig_ela_2022 = px.choropleth( avg_2022[avg_2022['TestSubject']=='ELA'], # data to use geojson = counties, # map to GeoJSON county list locations = 'FIPS', color = '2022PercentMet', projection = "mercator", hover_name = 'County', hover_data = {'FIPS':False, '2022CountMet' : ':,'}, # data shown when hovered over title = '2022 SBA ELA: Percent of Students that Met Standard by County', color_continuous_scale = "Greens", labels = {'2022PercentMet' : "Average % of Met"}, range_color = (0, 100)) # Update layout fig_ela_2022.update_geos(fitbounds="locations", visible=False) fig_ela_2022.update_layout(width=850, height=600) fig_ela_2022.show()

# Map 2019 Math data fig_math_2019 = px.choropleth(avg_2019[avg_2019['TestSubject']=='Math'], # data to use geojson = counties, locations = 'FIPS', color = '2019PercentMet', projection = "mercator", hover_name = 'County', hover_data = {'FIPS':False, '2019CountMet' : ':,'}, title = '2019 SBA Math: Met Standard Percent by County', color_continuous_scale = "Greens", #labels = {'2019 % Met Math' : "Average % of Met"}, range_color = (0, 100)) # Update layout fig_math_2019.update_geos(fitbounds="locations", visible=False) fig_math_2019.update_layout(width=850, height=600) fig_math_2019.show()

# Map 2022 Math data fig_math_2022 = px.choropleth(avg_2022[avg_2022['TestSubject']=='Math'], # data to use geojson = counties, locations = 'FIPS', color = '2022PercentMet', projection = "mercator", hover_name = 'County', hover_data = {'FIPS':False, '2022CountMet' : ':,'}, title = '2022 SBA Math: Met Standard Percent by County', color_continuous_scale = "Greens", labels = {'2022 % Met Math' : "Average % of Met"}, range_color = (0, 100)) # Update layout fig_math_2022.update_geos(fitbounds="locations", visible=False) fig_math_2022.update_layout(width=850, height=600) fig_math_2022.show()

# Map ELA delta fig_ela_change = px.choropleth(avg_combined[avg_combined['TestSubject']=='ELA'], geojson = counties, # map to GeoJSON county list locations = 'FIPS', color = 'PercentChange', projection = "mercator", hover_name = 'County', hover_data = {'FIPS':False}, title = 'Change in ELA', color_continuous_scale = px.colors.diverging.RdYlGn, color_continuous_midpoint = 0, #labels = {'ELA Change' : "19-22 Change"}, range_color = (-25, 25)) # Update layout fig_ela_change.update_geos(fitbounds="locations", visible=False) fig_ela_change.update_layout(width=850, height=600) fig_ela_change.show()

# Map Math delta fig_math_change = px.choropleth(avg_combined[avg_combined['TestSubject']=='Math'], geojson = counties, # map to GeoJSON county list locations = 'FIPS', color = 'PercentChange', projection = "mercator", hover_name = 'County', hover_data = {'FIPS':False}, title = 'Change in Math', color_continuous_scale = px.colors.diverging.RdYlGn, color_continuous_midpoint = 0, #labels = {'Math Change' : "19-22 Change"}, range_color = (-25, 25)) # Update layout fig_math_change.update_geos(fitbounds="locations", visible=False) fig_math_change.update_layout(width=850, height=600) fig_math_change.show()