# !pip install pandas lxml beautifulsoup4 html5lib matplotlib -U
Requirement already up-to-date: pandas in /opt/venv/lib/python3.7/site-packages (1.0.5)
Requirement already up-to-date: lxml in /opt/venv/lib/python3.7/site-packages (4.5.2)
Requirement already up-to-date: beautifulsoup4 in /opt/venv/lib/python3.7/site-packages (4.9.1)
Requirement already up-to-date: html5lib in /opt/venv/lib/python3.7/site-packages (1.1)
Requirement already up-to-date: matplotlib in /opt/venv/lib/python3.7/site-packages (3.3.0)
Requirement already satisfied, skipping upgrade: numpy>=1.13.3 in /opt/venv/lib/python3.7/site-packages (from pandas) (1.19.0)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.6.1 in /opt/venv/lib/python3.7/site-packages (from pandas) (2.8.1)
Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /opt/venv/lib/python3.7/site-packages (from pandas) (2020.1)
Requirement already satisfied, skipping upgrade: soupsieve>1.2 in /opt/venv/lib/python3.7/site-packages (from beautifulsoup4) (2.0.1)
Requirement already satisfied, skipping upgrade: webencodings in /opt/venv/lib/python3.7/site-packages (from html5lib) (0.5.1)
Requirement already satisfied, skipping upgrade: six>=1.9 in /opt/venv/lib/python3.7/site-packages (from html5lib) (1.15.0)
Requirement already satisfied, skipping upgrade: pillow>=6.2.0 in /opt/venv/lib/python3.7/site-packages (from matplotlib) (7.2.0)
Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/venv/lib/python3.7/site-packages (from matplotlib) (2.4.7)
Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /opt/venv/lib/python3.7/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied, skipping upgrade: cycler>=0.10 in /opt/venv/lib/python3.7/site-packages (from matplotlib) (0.10.0)
import sys
import pandas as pd
print(f"Python version {sys.version}")
print(f"pandas version: {pd.__version__}")
Python version 3.7.3 (default, Jun 11 2019, 01:11:15)
[GCC 6.3.0 20170516]
pandas version: 1.0.5
list_of_dfs = pd.read_html('https://www.ussoccer.com/uswnt-stats')
type(list_of_dfs)
len(list_of_dfs)
list_of_dfs[0]
list_of_dfs[1]
list_of_dfs_nicer = pd.read_html('https://www.ussoccer.com/uswnt-stats', header=0, index_col=0)
list_of_dfs_nicer[0].head(2)
runners_df = list_of_dfs_nicer[0]
goalies_df = list_of_dfs_nicer[1]
runners_df = runners_df.iloc[:-3]
runners_df.tail(2)
goalies_df = goalies_df.iloc[:-2]
goalies_df
goal_scorers_df = runners_df[runners_df['G']>0]
goal_scorers_df['G'].sort_values().plot(kind='barh', title='2020 USWNT Goals');
weightlifting_df_list = pd.read_html('https://en.wikipedia.org/wiki/List_of_weight_training_exercises', index_col=0)
len(weightlifting_df_list)
weightlifting_df_list[0]
exercises_df = weightlifting_df_list[0]
hammies = exercises_df[(exercises_df['Ham-strings']=='Yes') | (exercises_df['Ham-strings']=='Some')]
hammies
hammies.sort_values(by='Ham-strings', ascending=False)