# home page
home_url = 'https://www.metro.pe/'
# better elections
links_b_e = '//div[starts-with(@class, "product-item")]/div[@class = "product-item__info"]/a/@href'
name_product_b_e = '//div[@class = "product-content"]/div[@class="inner product"]/div[@class = "name-mobile mobile"]/div[starts-with(@class, "fn productName")]/text()'
best_price_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//strong[@class = "skuBestPrice"]/text()'
normal_price_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//strong[@class = "skuListPrice"]/text()'
image_b_e = '//div[@class = "product-content"]//div[@class = "product-info"]//div[@id = "include"]//div[@id = "image"]/a/@href'
import requests
import pandas as pd
import lxml.html as html
import datetime
arr_name_p_b_e = []
arr_best_price_b_e = []
arr_regular_price_b_e = []
arr_image_b_e = []
def get_product_information(link):
try:
response = requests.get(link)
if(response.status_code == 200):
product = response.content.decode('utf-8')
parsed = html.fromstring(product)
try:
title = parsed.xpath(name_product_b_e)
best_mode = parsed.xpath(product_best_buy_mode_b_e)
best_price = parsed.xpath(best_price_b_e)
regular_price = parsed.xpath(normal_price_b_e)
image = parsed.xpath(image_b_e)
arr_name_p_b_e.append(title)
arr_best_price_b_e.append(best_price)
arr_regular_price_b_e.append(regular_price)
arr_image_b_e.append(image)
except IndexError:
return
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as ve:
print(ve)
def home():
try:
response = requests.get(home_url)
if(response.status_code == 200):
# to reconize ñ
home = response.content.decode('utf-8')
# transform tu use Xpath
parsed = html.fromstring(home)
# get links of each product best elections
links_products_b_e = parsed.xpath(links_b_e)
print(f'Length Link Best elections Product: {len(links_products_b_e)}')
# print(links_products_b_e)
for link_b_e in links_products_b_e:
get_product_information(link_b_e)
else:
raise ValueError(f'Error: {response.status_code}')
except ValueError as ve:
print(ve)
# fill our arrays
home()
Length Link Best elections Product: 61
print(f'Lenght Names: {len(arr_name_p_b_e)}')
print(f'Lenght Best Price: {len(arr_best_price_b_e)}')
print(f'Lenght Regular Price: {len(arr_regular_price_b_e)}')
print(f'Lenght Image: {len(arr_image_b_e)}')
Lenght Names: 61
Lenght Best Price: 61
Lenght Regular Price: 61
Lenght Image: 61
# convert our list in a better way to work
arr_name_p_b_e = [i[0] for i in arr_name_p_b_e]
arr_best_price_b_e = [i[0] for i in arr_best_price_b_e]
arr_regular_price_b_e = [i[0] for i in arr_regular_price_b_e]
arr_image_b_e = [i[0] for i in arr_image_b_e]
# create our DataFrame
df_products_b_e = pd.DataFrame({
'name': arr_name_p_b_e,
'best_price': arr_best_price_b_e,
'regular_price': arr_regular_price_b_e,
'image_url': arr_image_b_e
})
df_products_b_e
nameobject
Nex Hervidor Eléctrico KP2720 1.7 Lt3.3%
Cuisinart Batidora de Inmersión RHB1001.6%
58 others95.1%
best_priceobject
S/. 39.906.6%
S/. 40.004.9%
45 others88.5%
0
Cuisinart Batidora de Inmersión RHB100
S/. 499.00
1
Electrolux Limpiador a Vapor Multiusos Mop 11 1300W
S/. 399.00
2
Taurus Licuadora 1.5 Lt TJBH 500W
S/. 159.90
3
Philips Plancha Seca GC185-89
S/. 129.00
4
Imaco Licuadora 1.5 Lt BLS508 5 Velocidades
S/. 179.00
5
Imaco Sandwich Maker Negro IST101N
S/. 89.00
6
Imaco Sandwichera IST460P
S/. 69.00
7
Taurus Cafetera de Filtro Florencia 12 Tazas
S/. 199.90
8
Cuisinart Hervidor Eléctrico Perfect CPK18 1.7 Lt
S/. 699.00
9
Oster Hervidor Eléctrico BVSTKT673CR-053 1.7 Lt
S/. 159.00
# create a format of our date
today = datetime.date.today().strftime('%d-%m-%Y')
today
df_products_b_e.to_csv(today + '.csv', index = False)
print('File saved')
File saved