import pandas as pd
df = pd.read_csv('jams.csv')
user = pd.Series(df['user_id'].unique())
user
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3170: DtypeWarning: Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
id = user.sample(1000)
id = id.tolist()
id
new_df = df.loc[df.user_id.isin(id)]
new_df
del new_df['jam_id']
del new_df['creation_date']
del new_df['link']
del new_df['spotify_uri']
del new_df['Unnamed: 7']
new_df
new_df['song'] = new_df['title'] + ' by ' + new_df['artist']
del new_df['title']
del new_df['artist']
new_df
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
"""Entry point for launching an IPython kernel.