!pip install nltk
from nltk.corpus import wordnet as wn
import nltk
nltk.download('wordnet')
poses = {'n': 'noun', 'v':'verb', 's': 'adj (s)', 'a' : 'adj', 'r':'adv'}
for synset in wordnet.synsets("language"):
print('{}: {}'.format(
poses[synset.pos()],
", ".join([l.name() for l in synset.lemmas()])
))
from nltk.corpus import wordnet
wordnet.synsets("language")
!pip install gensim
import gensim.downloader as api
from gensim.models.word2vec import Word2Vec
corpus = api.load('text8')
word_vectors = Word2Vec(corpus)
print(corpus)
print('word [medium] vector =\n', word_vectors['medium'])
word_vectors.most_similar('soccer')
word_vectors.doesnt_match("soccer tennis basketball bottle".split())
result = word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])
print("{}: {:.4f}".format(*result[0]))
def analogy(x1, x2, y1):
result = word_vectors.most_similar(positive=[y1, x2], negative=[x1])
return result[0][0]
analogy('brazil', 'brazilian', 'america')
analogy('europe', 'european', 'spain')
vocab = list(word_vectors.wv.vocab)
X = word_vectors[vocab]