# home page
home_url = 'https://www.metro.pe/'
# better elections
links_b_e = '//div[starts-with(@class, "product-item")]/div[@class = "product-item__info"]/a/@href'
name_product_b_e = '//div[@class = "product-content"]/div[@class="inner product"]/div[@class = "name-mobile mobile"]/div[starts-with(@class, "fn productName")]/text()'
best_price_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//strong[@class = "skuBestPrice"]/text()'
normal_price_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//strong[@class = "skuListPrice"]/text()'
image_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//div[@id = "include"]//div[@id = "image"]/a/@href'
import requests
import pandas as pd
import lxml.html as html
import datetime
arr_name_p_b_e = []
arr_best_price_b_e = []
arr_regular_price_b_e = []
arr_image_b_e = []
def get_product_information(link):
try:
response = requests.get(link)
if(response.status_code == 200):
product = response.content.decode('utf-8')
parsed = html.fromstring(product)
try:
title = parsed.xpath(name_product_b_e)
best_mode = parsed.xpath(product_best_buy_mode_b_e)
best_price = parsed.xpath(best_price_b_e)
regular_price = parsed.xpath(normal_price_b_e)
image = parsed.xpath(image_b_e)
arr_name_p_b_e.append(title)
arr_best_price_b_e.append(best_price)
arr_regular_price_b_e.append(regular_price)
arr_image_b_e.append(image)
except IndexError:
return
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as ve:
print(ve)
def home():
try:
response = requests.get(home_url)
if(response.status_code == 200):
# to reconize ñ
home = response.content.decode('utf-8')
# transform tu use Xpath
parsed = html.fromstring(home)
# get links of each product best elections
links_products_b_e = parsed.xpath(links_b_e)
print(f'Length Link Best elections Product: {len(links_products_b_e)}')
# print(links_products_b_e)
for link_b_e in links_products_b_e:
get_product_information(link_b_e)
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as ve:
print(ve)
# fill our arrays
home()
print(f'Lenght Names: {len(arr_name_p_b_e)}')
print(f'Lenght Best Price: {len(arr_best_price_b_e)}')
print(f'Lenght Regular Price: {len(arr_regular_price_b_e)}')
print(f'Lenght Image: {len(arr_image_b_e)}')
# convert our list in a better way to work
arr_name_p_b_e = [i[0] for i in arr_name_p_b_e]
arr_best_price_b_e = [i[0] for i in arr_best_price_b_e]
arr_regular_price_b_e = [i[0] for i in arr_regular_price_b_e]
arr_image_b_e = [i[0] for i in arr_image_b_e]
# create our DataFrame
df_products_b_e = pd.DataFrame({
'name': arr_name_p_b_e,
'best_price': arr_best_price_b_e,
'regular_price': arr_regular_price_b_e,
'image_url': arr_image_b_e
})
df_products_b_e
# create a format of our date
today = datetime.date.today().strftime('%d-%m-%Y')
today
df_products_b_e.to_csv(today + '.csv', index = False)
print('File saved')