import pandas as pd
!pip install spacy
!python -m spacy download fr_core_news_md
import spacy
nlp = spacy.load('fr_core_news_md')
doc = nlp(u"voudrais non animaux yeux dors couvre.")
for token in doc:
print(token, token.lemma_)
voudrais vouloir
non non
animaux animal
yeux oeil
dors dormir
couvre couvrir
. .
def lemma_function(text):
sent_tokens = nlp(text.lower())
liste_lemma = []
for token in sent_tokens:
liste_lemma.append(token.lemma_)
#texte_lemmatized =" ".join(liste_lemma)
return liste_lemma
lemma_function("voudrais non animaux yeux dors couvre.")
'''
from spacy.lang.fr.stop_words import STOP_WORDS as fr_stop
stopwords_fr = list(fr_stop)
stopwords_fr
'''
# def stop_word (texte_lemmatized):
# liste_sans_stopw = []
# for word in texte_lemmatized:
# if word in stopwords_fr:
# pass
# else:
# liste_sans_stopw.append(word)
# print(list_text)
liste_mots_cles = ["noel","hiver", "fin annee", "festif","familial",'homard',
'saumon' ,'saint jacque','langouste','huitre', 'dinde','foie gras','verrine','pain epice', 'buche', 'exotique'
]
liste_mots_cles_lemma = lemma_function(liste_mots_cles)
liste_mots_cles_lemma
path = '/work/export1.csv'
df = pd.read_csv(path)
df
lemma_lambda = lembda x : lemma_function(x)
df['Plat'] = df['Plat'].apply(lemma_lambda)
df
SyntaxError: invalid syntax (<ipython-input-44-6121a57da018>, line 1)
df['Plat'].isin(liste_mots_cles)
list1 = ['punch', 'exotique']
list2 = liste_mots_cles
list(set(list1).intersection(list2))
df['liste_com'] = list(set(list1).intersection(list2))
df['Plat'].isin(liste_mots_cles)
df
df['Plat'] = df['Plat'].astype(str)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Plat 15 non-null object
1 Note 15 non-null float64
2 Nb_Avis 15 non-null int64
3 Type_plat 15 non-null object
4 Liste_ingredients 15 non-null object
5 Liste_ingredients_complete 15 non-null bool
6 Duree_min 14 non-null float64
7 bool_col 15 non-null bool
dtypes: bool(2), float64(2), int64(1), object(3)
memory usage: 878.0+ bytes
df.set_index('Plat').reindex(liste_mots_cles).dropna()
df
df[df['Liste_ingredients'].isin(liste_mots_cles)]
'''
import string
def find_recette(text):
for word in text:
if word not in liste_mots_cles:
pass
else:
return True
break
'''
texte_test = 'buche de saumon en verrine'
texte_test1 = 'patate douce en sauce'
lemma_function(texte_test)
lemma_function(texte_test1)
print(find_recette(texte_test))
print(find_recette(texte_test1))
None
None
df['Plat'] = df['Plat'].apply(lambda x : lemma_function(x))
df
df['plat_noel'] = df['Plat'].apply(lambda x : find_recette(x))
df
df['Liste_ingredients'] = df['Liste_ingredients'].apply(lambda x : lemma_function(x))
df['ingedients_noel'] = df['Liste_ingredients'].apply(lambda x : find_recette(x))
df