Class Notebooks - 9:50

# start by loading in our typical set of packages import numpy as np # NumPy import pandas as pd # pandas import seaborn as sns import matplotlib.pyplot as plt import sys print(sys.version)

# use pip to install the package 'xlrd' # need it to read the excel file below !pip install xlrd

fnamein = 'Company Screening Report 0yr Lag.xls' # use pandas to read the excel file. Open it if you like and check out why the parameters are set the way they are. df_mult0 = pd.read_excel(fnamein,sheet_name='Screening',header=None,na_values=['-','NM'],skiprows=8) # here we are making our own column names instead of the longer ones in the Excel file. i = ['name','ex:tick','comp_type','exchange','industry','sector','marketcap','total_debt','shrout','revenue','gross_profit','sga','advertising','marketing','sales_marketing','gen_admin','rd_expense','int_expense','op_lease_int','ebitda','ebit','net_income','cash','capex','acquisitions','changeNWC','beta1','beta2','beta5','etr','tev','gRev1yr','gRev3yr','ebit_margin','ebitda_margin','gross_margin','roe','roc','altman'] # set column names df_mult0.columns = i # We are going to make the ticker our index - i.e. the unique identifier. So we can't have any NA values, so .dropna() df_mult0.dropna(axis=0, how='any', subset=['ex:tick'],inplace=True) df_mult0.set_index('ex:tick',inplace=True, verify_integrity=True) print(df_mult0.info()) df_mult0.describe()

# fill in the two variables mentioned above to see their summary stats df_mult0.loc[:,['revenue','tev']].describe()

# lets fix that. What should we do? Pick some (not all) of the following # set values that are nonsensical to NaN, keep the rest the same? df_mult0.loc[:,'revenue'] = np.where(df_mult0['revenue']< 0,np.nan,df_mult0['revenue']) df_mult0.loc[:,'tev'] = np.where(df_mult0['tev']<0,np.nan,df_mult0['tev']) # fill the NaN values with something? #df_mult0['revenue'].fillna(value='count',inplace=True) #df_mult0['tev'].fillna(value='count',inplace=True) # drop NaN values? df_mult0.dropna(axis=0, how='any', subset=['revenue','tev'], inplace=True) # see if you solved the problem df_mult0.loc[:,['revenue','tev']].describe()

#delete possibly before submission df_mult0.info()

# let's look at the dataframe, but only where the 'exchange' variable is NaN df_mult0[df_mult0.exchange.isna()]

# this looks a lot like they are just international firms. We should keep these. # Let's fill them with a value that may help us sort later. # Also, later functions won't like NaN in our data. df_mult0['exchange'].fillna(value='NonUS', inplace=True) #all this does is replace the NaN above with the text NonUS for Non-US Exchanges. #check how many NaN are left in the data frame # what function did we use above to see the non-null counts? df_mult0.info()

# working from the top, the next one with missing values is Market Cap. (depending on what you did exactly - it may be ok) # if the firm is missing MarketCap, then that means we don't have stock price or shares outstanding. # Either is a problem, so we have to drop these firms. How do we do that? # drop NaN values df_mult0.dropna(axis=0, how='any', subset=['marketcap'], inplace=True)

# next we see total_debt. # what does it mean if total_debt is missing? It likely means that the firm just has no debt. # filter the Data Frame by only looking at firms where total_debt is NaN. df_mult0.loc[df_mult0.total_debt.isna()]

# Maybe we need to look by sector. Where are these firms with missing total_debt? # Let's look at how many NaN we have in total_debt by sector. # the first part is the same filter as above, then use the groupby() function on the variable 'sector' df_mult0.loc[df_mult0.total_debt.isna()].groupby(['sector']).count()

# here is a nifty trick - combine two conditional statements: which firms have NaN total_debt and are NOT in the 'Financials' sector # (note the parentheses around each conditional statement - this is important) df_mult0.loc[(df_mult0.total_debt.isna()) & (df_mult0.sector != 'Financials')]

# since we feel comfortalbe that these are normal companies but that they just have 0 total_debt, let's fill the NaN with 0 df_mult0['total_debt'].fillna(value=0,inplace=True) # make sure you set the correct second parameter to update the DataFrame. df_mult0.info()

# here is another trick, though you may not need it. # what if a firm has EBIT but not EBITDA? We want to keep the firm, so we fill the EBITDA field with the EBIT, assuming that DA are 0 instead of NaN #df_mult0['ebitda'].fillna(value=df_mult0['ebit'], inplace=True)

You are now a lot of the way there. The goal is to do this kind of analysis all the way down for all of your variables, finding why there is any odd data - negatives, NaN, zeros, etc. You can use filters, look at the data, summary stats, etc.

# lets put all of our conditions into a single index object, then use that to filter our dataframe # do an internet search for "pandas dataframe slice by multiple conditions" if you want more info on what I'm doing here # each of these create a boolean series using specific criteria. idx_margin = (df_mult0.ebit_margin > 10) & (df_mult0.ebit_margin < 20) idx_roc = (df_mult0.roc > 10) & (df_mult0.roc < 20) idx_growth = (df_mult0.gRev1yr > 0) & (df_mult0.gRev1yr < 20) idx_beta = (df_mult0.beta2 > 0.8) & (df_mult0.beta2 < 1.2) # these are all boolean conditions, so we just use AND to combine them idx = idx_margin & idx_roc & idx_growth & idx_beta # now we pass idx for the row portion, and the list of columns we want to see df_mult0.loc[idx,['ebit_margin','roc','gRev1yr','beta2']].describe()

# now, compute the EV/EBITDA multiple. Or EV/EBIT. Or perhaps Price/Sales (MarketCap/Revenue). # note: you may want to use np.where() if your denominator is zero. You'll see inf or NaN for your multiple in that case. df_mult0['ev_ebitda'] = df_mult0['tev']/df_mult0['ebitda'] # check out your multiple df_mult0[idx]['ev_ebitda'].describe() #print(df_mult0[idx]['ev_ebitda']) #to see individual multiples

df_mult0[idx].sort_values(by='ev_ebitda')

# want to do a histogram? #df_mult0[idx]['ev_ebitda'].hist() #hist=df_mult0.hist(bins=5) fig=plt.figure() ax=fig.add_subplot(1,1,1,) n, bins, patches=ax.hist(df_mult0[idx]['ev_ebitda'],bins=10,range=(0,100),histtype='bar') for patch in patches: patch.set_facecolor('g') plt.title('EV/EBITDA Frequency Plot') plt.xlabel('EV/EBITDA') plt.ylabel('Frequency')

idx_margin = (df_mult0.ebit_margin > 0) & (df_mult0.ebit_margin < 10) idx_roc = (df_mult0.roc > 0) & (df_mult0.roc < 10) idx_growth = (df_mult0.gRev1yr < 0) & (df_mult0.gRev1yr > -20) idx_beta = (df_mult0.beta2 > 0.8) & (df_mult0.beta2 < 1.2) # these are all boolean conditions, so we just use AND to combine them idx = idx_margin & idx_roc & idx_growth & idx_beta # now we pass idx for the row portion, and the list of columns we want to see df_mult0.loc[idx,['ebit_margin','roc','gRev1yr','beta2']].describe()

# now, compute the EV/EBITDA multiple. Or EV/EBIT. Or perhaps Price/Sales (MarketCap/Revenue). # note: you may want to use np.where() if your denominator is zero. You'll see inf or NaN for your multiple in that case. df_mult0['ev_ebit'] = df_mult0['tev']/df_mult0['ebit'] # check out your multiple df_mult0[idx]['ev_ebit'].describe() #print(df_mult0[idx]['ev_ebit']) #to see individual multiples

df_mult0[idx].sort_values(by='ev_ebit')

# want to do a histogram? #df_mult0[idx]['ev_ebit'].hist() #hist=df_mult0.hist(bins=10) fig=plt.figure() ax=fig.add_subplot(1,1,1,) n, bins, patches=ax.hist(df_mult0[idx]['ev_ebit'],bins=15,range=(0,100),histtype='bar') for patch in patches: patch.set_facecolor('r') plt.title('EV/EBIT Frequency Plot') plt.xlabel('EV/EBIT') plt.ylabel('Frequency')

# We identified IntriCon Corporation (NasdaqGM:IIN) as the poorest performing company from our screen and # are giving it a SELL recommendation. We came to this conclusion after observing it's negative growth rates, # return on equity, no revenues, negative net income, high expenses, and extremely high mutliples. # Stats: ROE = -2.74, Revenue 1 year growth = -9.45, EV/EBIT multiple = 2,147, Net Income = -2.52 # We would recommend selling IntriCon Corp bacause we expect a significant decline in the next several years as # evidenced by the large negative 1-year growth rate of -9.45% and negative net income of -$2.52.

idx_margin = (df_mult0.ebit_margin > 20) & (df_mult0.ebit_margin < 30) idx_roc = (df_mult0.roc > 20) & (df_mult0.roc < 30) idx_growth = (df_mult0.gRev1yr > 0) & (df_mult0.gRev1yr < 30) idx_beta = (df_mult0.beta2 > 0.8) & (df_mult0.beta2 < 1.2) # these are all boolean conditions, so we just use AND to combine them idx = idx_margin & idx_roc & idx_growth & idx_beta # now we pass idx for the row portion, and the list of columns we want to see df_mult0.loc[idx,['ebit_margin','roc','gRev1yr','beta2']].describe()

# now, compute the EV/EBITDA multiple. Or EV/EBIT. Or perhaps Price/Sales (MarketCap/Revenue). # note: you may want to use np.where() if your denominator is zero. You'll see inf or NaN for your multiple in that case. df_mult0['ev_ebit'] = df_mult0['tev']/df_mult0['ebit'] # check out your multiple df_mult0[idx]['ev_ebit'].describe() #print(df_mult0[idx]['ev_ebit']) #to see individual multiples

df_mult0[idx].sort_values(by='ev_ebit')

# want to do a histogram? #df_mult0[idx]['ev_ebit'].hist() #hist=df_mult0.hist(bins=10) fig=plt.figure() ax=fig.add_subplot(1,1,1,) n, bins, patches=ax.hist(df_mult0[idx]['ev_ebit'],bins=15,range=(0,100),histtype='bar') for patch in patches: patch.set_facecolor('b') plt.title('EV/EBIT Frequency Plot') plt.xlabel('EV/EBIT') plt.ylabel('Frequency')

# We identified Apple as the best performing company from our screen # and are giving it a BUY recommendation. We came to this conclusion after observing it's positive ROE, high revenue growth rate of roughly 10% # and an established, mature EV/EBIT multiple of 28.35. While these numbers are not the highest amongst its peers # we felt that Apple is an established company and will continue to see growth through its innovation and technology # Stats: ROE = 82.1, Revenue 1 year growth = 9.88, EV/EBIT multiple = 28.35, Net Income = 63,930