Most data science students only learn to import CSV, but at work, you have to deal with multiple data formats, and things can get complicated if you are doing it for the first time. In this guide, we will be focusing on importing CSV, Excel, SQL, HTML, and JSON datasets.
SQL
# !kaggle datasets download -d anth7310/mental-health-in-the-tech-industry
# !unzip mental-health-in-the-tech-industry
import pandas as pd
import sqlite3
# Prepare a connection object
# Pass the Database name as a parameter
conn = sqlite3.connect("mental_health.sqlite")
# Use read_sql_query method
# Pass SELECT query and connection object as parameter
pdSql = pd.read_sql_query("SELECT * FROM Question", conn)
pdSql.head()
HTML
df_html = pd.read_html(
"https://www.pharmaceutical-technology.com/covid-19-vaccination-tracker/"
)[0]
df_html.head()
# !pip install kaggle
# !kaggle datasets download -d yasserh/bike-sharing-dataset
# !unzip bike-sharing-dataset
CSV
data_csv = pd.read_csv("day.csv")
data_csv.head()
EXCEL
# !kaggle datasets download -d yamqwe/u-s-presidents-and-debte
# !unzip u-s-presidents-and-debte
data_excel = pd.read_excel("US_Presidents.xlsx",sheet_name = 1, index_col = "index")
data_excel.head()
JSON
# !pip install kaggle
# !kaggle datasets download -d bricevergnou/spotify-recommendation
# !unzip spotify-recommendation
df_json = pd.read_json("good.json",orient='records')
df_json
import json
with open('good.json') as data_file:
data = json.load(data_file)
df = pd.json_normalize(data["audio_features"])
df.head()