f = open(r"/work/Dogecoin.txt", "rt")
text=f.read()
!pip install nltk
import nltk
nltk.download('punkt')
from nltk import sent_tokenize
Requirement already satisfied: nltk in /shared-libs/python3.7/py/lib/python3.7/site-packages (3.6.2)
Requirement already satisfied: joblib in /shared-libs/python3.7/py/lib/python3.7/site-packages (from nltk) (1.0.1)
Requirement already satisfied: click in /shared-libs/python3.7/py/lib/python3.7/site-packages (from nltk) (7.1.2)
Requirement already satisfied: regex in /shared-libs/python3.7/py/lib/python3.7/site-packages (from nltk) (2021.4.4)
Requirement already satisfied: tqdm in /shared-libs/python3.7/py/lib/python3.7/site-packages (from nltk) (4.60.0)
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data] Package punkt is already up-to-date!
sentences=sent_tokenize(text)
print(sentences[0])
Once again Elon Musk's tweet sent Dogecoin soaring to the moon as the meme currency surged 17 percent on Friday, erasing losses of the last seven sessions.
tokens=word_tokenize(text)
print(tokens)
['Once', 'again', 'Elon', 'Musk', "'s", 'tweet', 'sent', 'Dogecoin', 'soaring', 'to', 'the', 'moon', 'as', 'the', 'meme', 'currency', 'surged', '17', 'percent', 'on', 'Friday', ',', 'erasing', 'losses', 'of', 'the', 'last', 'seven', 'sessions', '.', 'Musk', 'took', 'to', 'Twitter', 'to', 'announce', 'that', 'he', 'is', 'working', 'with', 'Doge', 'developers', 'to', 'improve', 'the', 'system', 'transaction', 'efficiency', '.', '``', 'Working', 'with', 'Doge', 'devs', 'to', 'improve', 'system', 'transaction', 'efficiency', '.', 'Potentially', 'promising', ',', "''", 'he', 'said', '.', 'At', 'the', 'time', 'of', 'writing', ',', 'Dogecoin', 'was', 'trading', 'nearly', '17.16', 'percent', 'higher', 'at', '$', '0.52', ',', 'according', 'to', 'data', 'from', 'CoinDesk', '.', 'However', ',', 'it', 'remained', 'unclear', 'whether', 'Musk', 'was', 'referring', 'to', 'Dogecoin', "'s", 'energy', 'efficiency', '--', 'an', 'issue', 'that', 'made', 'Tesla', 'stop', 'accepting', 'payments', 'in', 'Bitcoin', '.', 'Bitcoin', 'has', 'been', 'on', 'a', 'downward', 'trajectory', 'since', 'then', '.', 'Bitcoin', 'is', 'heading', 'to', 'its', 'worst', 'week', 'since', 'Friday', '.', 'At', 'the', 'time', 'of', 'writing', ',', 'it', 'was', 'trading', 'at', '$', '48,983', ',', 'nearly', '4', 'percent', 'down', 'in', 'the', 'last', '24', 'hours', '.', 'Musk', "'s", 'tweets', 'have', 'a', 'tendency', 'to', 'send', 'the', 'markets', 'surging', 'or', 'plummeting', ',', 'more', 'so', 'when', 'the', 'tweets', 'concern', 'cryptocurrency', '.', 'Earlier', 'when', 'Musk', 'announced', 'Tesla', 'accepting', 'payments', 'in', 'bitcoin', ',', 'the', 'coin', 'crossed', 'the', '$', '60,000', 'levels', '.', 'Now', 'that', 'he', 'has', 'withdrawn', 'the', 'decision', ',', 'the', 'coin', 'plunged', 'nearly', '17', 'percent', 'Wednesday', '.', 'While', 'the', 'coin', 'had', 'since', 'overcome', 'the', 'fall', 'and', 'rebounded', 'on', 'Thursday', 'and', 'was', 'trading', 'well', 'above', '$', '50,000-level', ',', 'its', 'downward', 'journey', 'began', 'again', 'after', 'another', 'shock', '.', 'The', 'reports', 'on', 'a', 'regulatory', 'probe', 'into', 'crypto', 'exchange', 'Binance', 'sent', 'Bitcoin', 'spiralling', 'again', 'on', 'Friday', '.', 'Under', 'the', 'added', 'pressure', ',', 'Bitcoin', 'lost', '4', 'percent', 'and', 'it', 'is', 'currently', 'trading', 'below', 'its', 'psychological', 'support', 'line', 'of', '$', '50,000', '.', 'Blockchain', 'data', 'provider', 'Glassnode', 'on', 'Monday', 'said', 'there', 'are', 'indications', 'that', 'a', 'portion', 'of', 'bitcoin', "'s", 'capital', 'is', 'rotating', 'towards', 'ethereum', 'and', 'dogecoin', '.', 'In', 'the', 'past', 'seven', 'days', ',', 'while', 'Bitcoin', 'has', 'lost', '12.11', 'percent', ',', 'according', 'to', 'data', 'from', 'CoinMarketCap', ',', 'Ether', 'has', 'gained', 'nearly', '11', 'percent', 'during', 'the', 'same', 'period', '(', 'though', 'it', 'was', 'trading', 'in', 'red', 'on', 'Friday', ')', '.', 'At', 'the', 'time', 'of', 'writing', ',', 'the', 'coin', 'was', 'down', 'nearly', '5', 'percent', 'at', '$', '3,798', '.', 'Ethereum', 'has', 'soared', 'this', 'year', ',', 'majorly', 'ignited', 'by', 'the', 'boom', 'in', 'DeFi', '--', 'platforms', 'that', 'facilitate', 'crypto-denominated', 'lending', 'outside', 'traditional', 'banking', '.', 'Dogecoin', 'too', 'has', 'suffered', 'blows', 'from', 'Elon', 'Musk', 'when', 'he', 'referred', 'to', 'the', 'coin', 'as', 'a', '``', 'hustle', "''", '.', 'The', 'coin', 'had', 'dropped', '10', 'percent', 'sharply', 'after', 'the', 'tweet', '.']
words=text.split()
print(words)
['Once', 'again', 'Elon', "Musk's", 'tweet', 'sent', 'Dogecoin', 'soaring', 'to', 'the', 'moon', 'as', 'the', 'meme', 'currency', 'surged', '17', 'percent', 'on', 'Friday,', 'erasing', 'losses', 'of', 'the', 'last', 'seven', 'sessions.', 'Musk', 'took', 'to', 'Twitter', 'to', 'announce', 'that', 'he', 'is', 'working', 'with', 'Doge', 'developers', 'to', 'improve', 'the', 'system', 'transaction', 'efficiency.', '"Working', 'with', 'Doge', 'devs', 'to', 'improve', 'system', 'transaction', 'efficiency.', 'Potentially', 'promising,"', 'he', 'said.', 'At', 'the', 'time', 'of', 'writing,', 'Dogecoin', 'was', 'trading', 'nearly', '17.16', 'percent', 'higher', 'at', '$0.52,', 'according', 'to', 'data', 'from', 'CoinDesk.', 'However,', 'it', 'remained', 'unclear', 'whether', 'Musk', 'was', 'referring', 'to', "Dogecoin's", 'energy', 'efficiency', '--', 'an', 'issue', 'that', 'made', 'Tesla', 'stop', 'accepting', 'payments', 'in', 'Bitcoin.', 'Bitcoin', 'has', 'been', 'on', 'a', 'downward', 'trajectory', 'since', 'then.', 'Bitcoin', 'is', 'heading', 'to', 'its', 'worst', 'week', 'since', 'Friday.', 'At', 'the', 'time', 'of', 'writing,', 'it', 'was', 'trading', 'at', '$48,983,', 'nearly', '4', 'percent', 'down', 'in', 'the', 'last', '24', 'hours.', "Musk's", 'tweets', 'have', 'a', 'tendency', 'to', 'send', 'the', 'markets', 'surging', 'or', 'plummeting,', 'more', 'so', 'when', 'the', 'tweets', 'concern', 'cryptocurrency.', 'Earlier', 'when', 'Musk', 'announced', 'Tesla', 'accepting', 'payments', 'in', 'bitcoin,', 'the', 'coin', 'crossed', 'the', '$60,000', 'levels.', 'Now', 'that', 'he', 'has', 'withdrawn', 'the', 'decision,', 'the', 'coin', 'plunged', 'nearly', '17', 'percent', 'Wednesday.', 'While', 'the', 'coin', 'had', 'since', 'overcome', 'the', 'fall', 'and', 'rebounded', 'on', 'Thursday', 'and', 'was', 'trading', 'well', 'above', '$50,000-level,', 'its', 'downward', 'journey', 'began', 'again', 'after', 'another', 'shock.', 'The', 'reports', 'on', 'a', 'regulatory', 'probe', 'into', 'crypto', 'exchange', 'Binance', 'sent', 'Bitcoin', 'spiralling', 'again', 'on', 'Friday.', 'Under', 'the', 'added', 'pressure,', 'Bitcoin', 'lost', '4', 'percent', 'and', 'it', 'is', 'currently', 'trading', 'below', 'its', 'psychological', 'support', 'line', 'of', '$50,000.', 'Blockchain', 'data', 'provider', 'Glassnode', 'on', 'Monday', 'said', 'there', 'are', 'indications', 'that', 'a', 'portion', 'of', "bitcoin's", 'capital', 'is', 'rotating', 'towards', 'ethereum', 'and', 'dogecoin.', 'In', 'the', 'past', 'seven', 'days,', 'while', 'Bitcoin', 'has', 'lost', '12.11', 'percent,', 'according', 'to', 'data', 'from', 'CoinMarketCap,', 'Ether', 'has', 'gained', 'nearly', '11', 'percent', 'during', 'the', 'same', 'period', '(though', 'it', 'was', 'trading', 'in', 'red', 'on', 'Friday).', 'At', 'the', 'time', 'of', 'writing,', 'the', 'coin', 'was', 'down', 'nearly', '5', 'percent', 'at', '$3,798.', 'Ethereum', 'has', 'soared', 'this', 'year,', 'majorly', 'ignited', 'by', 'the', 'boom', 'in', 'DeFi', '--', 'platforms', 'that', 'facilitate', 'crypto-denominated', 'lending', 'outside', 'traditional', 'banking.', 'Dogecoin', 'too', 'has', 'suffered', 'blows', 'from', 'Elon', 'Musk', 'when', 'he', 'referred', 'to', 'the', 'coin', 'as', 'a', '"hustle".', 'The', 'coin', 'had', 'dropped', '10', 'percent', 'sharply', 'after', 'the', 'tweet.']
punc = '[\\!”#$%&’()\*+,-./:;<=>?@[\]^_`{|}~]'
table=str.maketrans("","",punc)
stripped=[w.translate(table) for w in words]
print(stripped)
['Once', 'again', 'Elon', "Musk's", 'tweet', 'sent', 'Dogecoin', 'soaring', 'to', 'the', 'moon', 'as', 'the', 'meme', 'currency', 'surged', '17', 'percent', 'on', 'Friday', 'erasing', 'losses', 'of', 'the', 'last', 'seven', 'sessions', 'Musk', 'took', 'to', 'Twitter', 'to', 'announce', 'that', 'he', 'is', 'working', 'with', 'Doge', 'developers', 'to', 'improve', 'the', 'system', 'transaction', 'efficiency', '"Working', 'with', 'Doge', 'devs', 'to', 'improve', 'system', 'transaction', 'efficiency', 'Potentially', 'promising"', 'he', 'said', 'At', 'the', 'time', 'of', 'writing', 'Dogecoin', 'was', 'trading', 'nearly', '1716', 'percent', 'higher', 'at', '052', 'according', 'to', 'data', 'from', 'CoinDesk', 'However', 'it', 'remained', 'unclear', 'whether', 'Musk', 'was', 'referring', 'to', "Dogecoin's", 'energy', 'efficiency', '', 'an', 'issue', 'that', 'made', 'Tesla', 'stop', 'accepting', 'payments', 'in', 'Bitcoin', 'Bitcoin', 'has', 'been', 'on', 'a', 'downward', 'trajectory', 'since', 'then', 'Bitcoin', 'is', 'heading', 'to', 'its', 'worst', 'week', 'since', 'Friday', 'At', 'the', 'time', 'of', 'writing', 'it', 'was', 'trading', 'at', '48983', 'nearly', '4', 'percent', 'down', 'in', 'the', 'last', '24', 'hours', "Musk's", 'tweets', 'have', 'a', 'tendency', 'to', 'send', 'the', 'markets', 'surging', 'or', 'plummeting', 'more', 'so', 'when', 'the', 'tweets', 'concern', 'cryptocurrency', 'Earlier', 'when', 'Musk', 'announced', 'Tesla', 'accepting', 'payments', 'in', 'bitcoin', 'the', 'coin', 'crossed', 'the', '60000', 'levels', 'Now', 'that', 'he', 'has', 'withdrawn', 'the', 'decision', 'the', 'coin', 'plunged', 'nearly', '17', 'percent', 'Wednesday', 'While', 'the', 'coin', 'had', 'since', 'overcome', 'the', 'fall', 'and', 'rebounded', 'on', 'Thursday', 'and', 'was', 'trading', 'well', 'above', '50000level', 'its', 'downward', 'journey', 'began', 'again', 'after', 'another', 'shock', 'The', 'reports', 'on', 'a', 'regulatory', 'probe', 'into', 'crypto', 'exchange', 'Binance', 'sent', 'Bitcoin', 'spiralling', 'again', 'on', 'Friday', 'Under', 'the', 'added', 'pressure', 'Bitcoin', 'lost', '4', 'percent', 'and', 'it', 'is', 'currently', 'trading', 'below', 'its', 'psychological', 'support', 'line', 'of', '50000', 'Blockchain', 'data', 'provider', 'Glassnode', 'on', 'Monday', 'said', 'there', 'are', 'indications', 'that', 'a', 'portion', 'of', "bitcoin's", 'capital', 'is', 'rotating', 'towards', 'ethereum', 'and', 'dogecoin', 'In', 'the', 'past', 'seven', 'days', 'while', 'Bitcoin', 'has', 'lost', '1211', 'percent', 'according', 'to', 'data', 'from', 'CoinMarketCap', 'Ether', 'has', 'gained', 'nearly', '11', 'percent', 'during', 'the', 'same', 'period', 'though', 'it', 'was', 'trading', 'in', 'red', 'on', 'Friday', 'At', 'the', 'time', 'of', 'writing', 'the', 'coin', 'was', 'down', 'nearly', '5', 'percent', 'at', '3798', 'Ethereum', 'has', 'soared', 'this', 'year', 'majorly', 'ignited', 'by', 'the', 'boom', 'in', 'DeFi', '', 'platforms', 'that', 'facilitate', 'cryptodenominated', 'lending', 'outside', 'traditional', 'banking', 'Dogecoin', 'too', 'has', 'suffered', 'blows', 'from', 'Elon', 'Musk', 'when', 'he', 'referred', 'to', 'the', 'coin', 'as', 'a', '"hustle"', 'The', 'coin', 'had', 'dropped', '10', 'percent', 'sharply', 'after', 'the', 'tweet']
import re
words=re.split(r'\W+', text)
print(words)
['Once', 'again', 'Elon', 'Musk', 's', 'tweet', 'sent', 'Dogecoin', 'soaring', 'to', 'the', 'moon', 'as', 'the', 'meme', 'currency', 'surged', '17', 'percent', 'on', 'Friday', 'erasing', 'losses', 'of', 'the', 'last', 'seven', 'sessions', 'Musk', 'took', 'to', 'Twitter', 'to', 'announce', 'that', 'he', 'is', 'working', 'with', 'Doge', 'developers', 'to', 'improve', 'the', 'system', 'transaction', 'efficiency', 'Working', 'with', 'Doge', 'devs', 'to', 'improve', 'system', 'transaction', 'efficiency', 'Potentially', 'promising', 'he', 'said', 'At', 'the', 'time', 'of', 'writing', 'Dogecoin', 'was', 'trading', 'nearly', '17', '16', 'percent', 'higher', 'at', '0', '52', 'according', 'to', 'data', 'from', 'CoinDesk', 'However', 'it', 'remained', 'unclear', 'whether', 'Musk', 'was', 'referring', 'to', 'Dogecoin', 's', 'energy', 'efficiency', 'an', 'issue', 'that', 'made', 'Tesla', 'stop', 'accepting', 'payments', 'in', 'Bitcoin', 'Bitcoin', 'has', 'been', 'on', 'a', 'downward', 'trajectory', 'since', 'then', 'Bitcoin', 'is', 'heading', 'to', 'its', 'worst', 'week', 'since', 'Friday', 'At', 'the', 'time', 'of', 'writing', 'it', 'was', 'trading', 'at', '48', '983', 'nearly', '4', 'percent', 'down', 'in', 'the', 'last', '24', 'hours', 'Musk', 's', 'tweets', 'have', 'a', 'tendency', 'to', 'send', 'the', 'markets', 'surging', 'or', 'plummeting', 'more', 'so', 'when', 'the', 'tweets', 'concern', 'cryptocurrency', 'Earlier', 'when', 'Musk', 'announced', 'Tesla', 'accepting', 'payments', 'in', 'bitcoin', 'the', 'coin', 'crossed', 'the', '60', '000', 'levels', 'Now', 'that', 'he', 'has', 'withdrawn', 'the', 'decision', 'the', 'coin', 'plunged', 'nearly', '17', 'percent', 'Wednesday', 'While', 'the', 'coin', 'had', 'since', 'overcome', 'the', 'fall', 'and', 'rebounded', 'on', 'Thursday', 'and', 'was', 'trading', 'well', 'above', '50', '000', 'level', 'its', 'downward', 'journey', 'began', 'again', 'after', 'another', 'shock', 'The', 'reports', 'on', 'a', 'regulatory', 'probe', 'into', 'crypto', 'exchange', 'Binance', 'sent', 'Bitcoin', 'spiralling', 'again', 'on', 'Friday', 'Under', 'the', 'added', 'pressure', 'Bitcoin', 'lost', '4', 'percent', 'and', 'it', 'is', 'currently', 'trading', 'below', 'its', 'psychological', 'support', 'line', 'of', '50', '000', 'Blockchain', 'data', 'provider', 'Glassnode', 'on', 'Monday', 'said', 'there', 'are', 'indications', 'that', 'a', 'portion', 'of', 'bitcoin', 's', 'capital', 'is', 'rotating', 'towards', 'ethereum', 'and', 'dogecoin', 'In', 'the', 'past', 'seven', 'days', 'while', 'Bitcoin', 'has', 'lost', '12', '11', 'percent', 'according', 'to', 'data', 'from', 'CoinMarketCap', 'Ether', 'has', 'gained', 'nearly', '11', 'percent', 'during', 'the', 'same', 'period', 'though', 'it', 'was', 'trading', 'in', 'red', 'on', 'Friday', 'At', 'the', 'time', 'of', 'writing', 'the', 'coin', 'was', 'down', 'nearly', '5', 'percent', 'at', '3', '798', 'Ethereum', 'has', 'soared', 'this', 'year', 'majorly', 'ignited', 'by', 'the', 'boom', 'in', 'DeFi', 'platforms', 'that', 'facilitate', 'crypto', 'denominated', 'lending', 'outside', 'traditional', 'banking', 'Dogecoin', 'too', 'has', 'suffered', 'blows', 'from', 'Elon', 'Musk', 'when', 'he', 'referred', 'to', 'the', 'coin', 'as', 'a', 'hustle', 'The', 'coin', 'had', 'dropped', '10', 'percent', 'sharply', 'after', 'the', 'tweet', '']
words=[word.lower() for word in words]
print(words)
['once', 'again', 'elon', 'musk', 's', 'tweet', 'sent', 'dogecoin', 'soaring', 'to', 'the', 'moon', 'as', 'the', 'meme', 'currency', 'surged', '17', 'percent', 'on', 'friday', 'erasing', 'losses', 'of', 'the', 'last', 'seven', 'sessions', 'musk', 'took', 'to', 'twitter', 'to', 'announce', 'that', 'he', 'is', 'working', 'with', 'doge', 'developers', 'to', 'improve', 'the', 'system', 'transaction', 'efficiency', 'working', 'with', 'doge', 'devs', 'to', 'improve', 'system', 'transaction', 'efficiency', 'potentially', 'promising', 'he', 'said', 'at', 'the', 'time', 'of', 'writing', 'dogecoin', 'was', 'trading', 'nearly', '17', '16', 'percent', 'higher', 'at', '0', '52', 'according', 'to', 'data', 'from', 'coindesk', 'however', 'it', 'remained', 'unclear', 'whether', 'musk', 'was', 'referring', 'to', 'dogecoin', 's', 'energy', 'efficiency', 'an', 'issue', 'that', 'made', 'tesla', 'stop', 'accepting', 'payments', 'in', 'bitcoin', 'bitcoin', 'has', 'been', 'on', 'a', 'downward', 'trajectory', 'since', 'then', 'bitcoin', 'is', 'heading', 'to', 'its', 'worst', 'week', 'since', 'friday', 'at', 'the', 'time', 'of', 'writing', 'it', 'was', 'trading', 'at', '48', '983', 'nearly', '4', 'percent', 'down', 'in', 'the', 'last', '24', 'hours', 'musk', 's', 'tweets', 'have', 'a', 'tendency', 'to', 'send', 'the', 'markets', 'surging', 'or', 'plummeting', 'more', 'so', 'when', 'the', 'tweets', 'concern', 'cryptocurrency', 'earlier', 'when', 'musk', 'announced', 'tesla', 'accepting', 'payments', 'in', 'bitcoin', 'the', 'coin', 'crossed', 'the', '60', '000', 'levels', 'now', 'that', 'he', 'has', 'withdrawn', 'the', 'decision', 'the', 'coin', 'plunged', 'nearly', '17', 'percent', 'wednesday', 'while', 'the', 'coin', 'had', 'since', 'overcome', 'the', 'fall', 'and', 'rebounded', 'on', 'thursday', 'and', 'was', 'trading', 'well', 'above', '50', '000', 'level', 'its', 'downward', 'journey', 'began', 'again', 'after', 'another', 'shock', 'the', 'reports', 'on', 'a', 'regulatory', 'probe', 'into', 'crypto', 'exchange', 'binance', 'sent', 'bitcoin', 'spiralling', 'again', 'on', 'friday', 'under', 'the', 'added', 'pressure', 'bitcoin', 'lost', '4', 'percent', 'and', 'it', 'is', 'currently', 'trading', 'below', 'its', 'psychological', 'support', 'line', 'of', '50', '000', 'blockchain', 'data', 'provider', 'glassnode', 'on', 'monday', 'said', 'there', 'are', 'indications', 'that', 'a', 'portion', 'of', 'bitcoin', 's', 'capital', 'is', 'rotating', 'towards', 'ethereum', 'and', 'dogecoin', 'in', 'the', 'past', 'seven', 'days', 'while', 'bitcoin', 'has', 'lost', '12', '11', 'percent', 'according', 'to', 'data', 'from', 'coinmarketcap', 'ether', 'has', 'gained', 'nearly', '11', 'percent', 'during', 'the', 'same', 'period', 'though', 'it', 'was', 'trading', 'in', 'red', 'on', 'friday', 'at', 'the', 'time', 'of', 'writing', 'the', 'coin', 'was', 'down', 'nearly', '5', 'percent', 'at', '3', '798', 'ethereum', 'has', 'soared', 'this', 'year', 'majorly', 'ignited', 'by', 'the', 'boom', 'in', 'defi', 'platforms', 'that', 'facilitate', 'crypto', 'denominated', 'lending', 'outside', 'traditional', 'banking', 'dogecoin', 'too', 'has', 'suffered', 'blows', 'from', 'elon', 'musk', 'when', 'he', 'referred', 'to', 'the', 'coin', 'as', 'a', 'hustle', 'the', 'coin', 'had', 'dropped', '10', 'percent', 'sharply', 'after', 'the', 'tweet', '']
import nltk
nltk.download('stopwords')
stop_words= stopwords.words('english')
print(stop_words)
['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
doge_stop=[w for w in words if not w in stop_words]
print(doge_stop)
['elon', 'musk', 'tweet', 'sent', 'dogecoin', 'soaring', 'moon', 'meme', 'currency', 'surged', '17', 'percent', 'friday', 'erasing', 'losses', 'last', 'seven', 'sessions', 'musk', 'took', 'twitter', 'announce', 'working', 'doge', 'developers', 'improve', 'system', 'transaction', 'efficiency', 'working', 'doge', 'devs', 'improve', 'system', 'transaction', 'efficiency', 'potentially', 'promising', 'said', 'time', 'writing', 'dogecoin', 'trading', 'nearly', '17', '16', 'percent', 'higher', '0', '52', 'according', 'data', 'coindesk', 'however', 'remained', 'unclear', 'whether', 'musk', 'referring', 'dogecoin', 'energy', 'efficiency', 'issue', 'made', 'tesla', 'stop', 'accepting', 'payments', 'bitcoin', 'bitcoin', 'downward', 'trajectory', 'since', 'bitcoin', 'heading', 'worst', 'week', 'since', 'friday', 'time', 'writing', 'trading', '48', '983', 'nearly', '4', 'percent', 'last', '24', 'hours', 'musk', 'tweets', 'tendency', 'send', 'markets', 'surging', 'plummeting', 'tweets', 'concern', 'cryptocurrency', 'earlier', 'musk', 'announced', 'tesla', 'accepting', 'payments', 'bitcoin', 'coin', 'crossed', '60', '000', 'levels', 'withdrawn', 'decision', 'coin', 'plunged', 'nearly', '17', 'percent', 'wednesday', 'coin', 'since', 'overcome', 'fall', 'rebounded', 'thursday', 'trading', 'well', '50', '000', 'level', 'downward', 'journey', 'began', 'another', 'shock', 'reports', 'regulatory', 'probe', 'crypto', 'exchange', 'binance', 'sent', 'bitcoin', 'spiralling', 'friday', 'added', 'pressure', 'bitcoin', 'lost', '4', 'percent', 'currently', 'trading', 'psychological', 'support', 'line', '50', '000', 'blockchain', 'data', 'provider', 'glassnode', 'monday', 'said', 'indications', 'portion', 'bitcoin', 'capital', 'rotating', 'towards', 'ethereum', 'dogecoin', 'past', 'seven', 'days', 'bitcoin', 'lost', '12', '11', 'percent', 'according', 'data', 'coinmarketcap', 'ether', 'gained', 'nearly', '11', 'percent', 'period', 'though', 'trading', 'red', 'friday', 'time', 'writing', 'coin', 'nearly', '5', 'percent', '3', '798', 'ethereum', 'soared', 'year', 'majorly', 'ignited', 'boom', 'defi', 'platforms', 'facilitate', 'crypto', 'denominated', 'lending', 'outside', 'traditional', 'banking', 'dogecoin', 'suffered', 'blows', 'elon', 'musk', 'referred', 'coin', 'hustle', 'coin', 'dropped', '10', 'percent', 'sharply', 'tweet', '']