! pip install yfinance
import yfinance as yf
data = yf.download("AAPL IBM", start="2009-01-01", end="2019-12-31")
data['Open']
%%bash
$HADOOP_HOME/sbin/start-dfs.sh
$HADOOP_HOME/sbin/start-yarn.sh
from hdfs3 import HDFileSystem
hdfs = HDFileSystem(host='localhost', port=9000)
with hdfs.open('AAPL_IBM_open.csv', 'wb') as f:
data['Open'].to_csv(f,header=True)
hdfs.ls('.')
%%file stock_analysis.py
from mrjob.job import MRJob
import re
import sys
class StockAnalysis(MRJob):
def mapper(self, key, value):
date, apple_open, samsung_open = value.split(',')
#print(value, file=sys.stderr)
year = date[:4]
month = date[5:7]
if (month=='10' or month=='11' or month=='12'):
apple_key = 'apple_%s' % year
samsung_key = 'samsung_%s' % year
yield(apple_key, float(apple_open))
yield(samsung_key, float(samsung_open))
def reducer(self, key, values):
yield(key, max(values))
if __name__ == '__main__':
StockAnalysis.run()
!python stock_analysis.py -r hadoop hdfs:///user/root/AAPL_IBM_open.csv
%%bash
$HADOOP_HOME/sbin/stop-yarn.sh
$HADOOP_HOME/sbin/stop-dfs.sh