import Europart
import LKWTeile24
import Winkler
from time import perf_counter
from selenium import webdriver
import math
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
# Initializing a Chrome WebDriver.
def initialize_driver():
# Implicit wait is set to 10 seconds.
# If a website does not load until then, no results will be retrieved.
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('chromedriver', options=options)
driver.implicitly_wait(10)
return driver
def read_from_xlsx(file_directory):
# For uniformity reasons PNs are converted to string.
# This is required later to compare the search-term with the resulted PN.
return pd.read_excel(file_directory, header=None)[0].apply(str).to_list()
def scrape_all_records(driver, pn, keep):
tic = perf_counter()
records = Europart.scrape_records(driver, pn, keep), \
Winkler.scrape_records(driver, pn, keep), \
LKWTeile24.scrape_records(driver, pn, keep)
toc = perf_counter()
print(f"PN: {pn} - {toc - tic:0.2f} seconds")
print(records)
return records
# Here, all the functions and files for webscraping are put into action.
def web_scraping(file_directory, keep):
driver = initialize_driver()
login(driver)
# Lists for the results of each website + a list to record the relevant vehicle types
ep_results, wk_results, lkw_results, model_types = ([] for i in range(4))
pn_list = read_from_xlsx(file_directory)
# The scraping is handled in each corresponding python file.
for pn in pn_list:
ep_records, wk_records, (lkw_records, models) = scrape_all_records(driver, pn, keep)
if ep_records:
ep_results.extend(ep_records)
else:
ep_results.append(('Europart', pn, 'No results found', None, None, None))
if wk_records:
wk_results.extend(wk_records)
else:
wk_results.append(('Winkler', pn, 'No results found', None, None, None))
if lkw_records:
lkw_results.extend(lkw_records)
else:
lkw_results.append(('LKW-Teile 24', pn, 'No results found', None, None))
if models:
model_types.append(models)
driver.quit()
# The results are saved in separate DataFrames, as some websites provide more/less information.
ep_df = pd.DataFrame(data=ep_results, columns=['Anbieter', 'Teilenummer', 'Artikelname', 'Beschreibung', 'Marke', 'Nettopreis'])
lkw_df = pd.DataFrame(data=lkw_results, columns=['Anbieter', 'Teilenummer', 'Artikelname', 'Marke', 'Nettopreis'])
wk_df = pd.DataFrame(data=wk_results, columns=['Anbieter', 'Teilenummer', 'Artikelname', 'Marke', 'Nettopreis', 'Check'])
model_df = pd.DataFrame(data=model_types, columns=['Teilenummer', 'Passend für'])
return [ep_df, wk_df, lkw_df], model_df
# Combining all three steps of ETL into one function
def main(file_directory='Input1.xlsx', keep=0):
### EXTRACT ###
# The results are concatenated, but the DataFrame with the vehicle model types is kept separated.
record_dfs, model_df = web_scraping(file_directory, keep)
df = pd.concat(record_dfs).reset_index(drop=True)
### TRANSFORM ###
# Deduplicating entries for PNs without any results, and putting them at the end (in 2 steps)
# 1. Creating a DataFrame with all PNs for which no results have been found on any website.
empties = df.groupby('Teilenummer')['Nettopreis'].count().reset_index()
empties = empties[empties.Nettopreis == 0]
empties['Artikelname'] = 'No results found'
empties.drop(columns=['Nettopreis'], inplace=True)
df.sort_values(by=['Teilenummer', 'Anbieter'], inplace=True)
# 2. Delete all entries without 'No results found' and append them back on the bottom.
df = df[df.Artikelname != "No results found"]
df = df.append(empties, ignore_index=True)
# Left join with vehicle types and formatting.
df = df.merge(model_df, on='Teilenummer', how='left')
df['Marke'] = df.Marke.str.title()
### LOAD ###
# Save to Excel but also return DataFrame for further analysis on this notebook
df.to_excel('Output.xlsx', sheet_name='Final', index=False)
return df
# The file can be changed through the input cell further above.
file_directory = f'InputFiles/{chooseFile}'
print('PNs:\n', dict((i,j) for i,j in enumerate(read_from_xlsx(file_directory))), '\n')
df = main(file_directory)
df.head(3)
PNs:
{0: '2995046', 1: '2995138', 2: '2997760', 3: '2998646', 4: '4786023', 5: '4808509', 6: '4892318', 7: '4892320', 8: '42522680', 9: '42522942', 10: '69502571', 11: '79014078', 12: '99432760'}
PN: 2995046 - 22.18 seconds
([], [], ([('LKW-Teile 24', '2995046', 'Generator', 'DT Spare Parts', 150.46)], ('2995046', 'EuroStar 1993-2002 | EuroTech 1992-2002 | Eurocargo (tbd) | MK 1975-1992 | P 1979-1993 | PA 1979-1993 | EuroTrakker 1993-2004 | TurboStar 1984-1993 | TurboTech 1984-1993 | Zeta 1979-1992')))
PN: 2995138 - 22.16 seconds
([('Europart', '2995138', 'Starter', 'Starter NEU 24V 4.5KW 10Z. 8EA 012 586-001 CS1383 Ef.8EA738258111', 'hella', 231.55), ('Europart', '2995138', 'Starter', 'Starter 24V/4.5kW 10Z. 20540102 PRESTOLITE Bosch System', 'prestolite', 271.7), ('Europart', '2995138', 'Starter', 'Starter AT 24V/4.5KW 0 986 022 940 f.Iveco/Mitsub. BOSCH', 'bosch', 370.2)], [('Winkler', '2995138', 'Anlasser, AT, 24V, 4,5kW', 'Remy', 119.1, None), ('Winkler', '2995138', 'Anlasser, AT, 24V, 4,5kW', 'Bosch', 370.2, None), ('Winkler', '2995138', 'Anlasser, 24V, 4,5kW', 'Prestolite', 413.0, None)], ([('LKW-Teile 24', '2995138', 'Anlasser', 'DT Spare Parts', 181.29), ('LKW-Teile 24', '2995138', '+Line Anlasser für Iveco 4.5 kw F042000117', '+Line', 129.55), ('LKW-Teile 24', '2995138', 'Anlasser Spannung [V] 24 ', 'PE Automotive', 178.24), ('LKW-Teile 24', '2995138', 'Anlasser 24V / 5,0kW', 'Monark', 246.08), ('LKW-Teile 24', '2995138', 'Anlasser 24V / 4,5kW', 'Monark', 163.68)], ('2995138', 'Citelis 2005-2013 | CityClass 1996-2007 | Crossway 2006- | EuroClass 1993-2007 | EuroMover 1998-2005 | EuroRider 2001-2013 | EuroTech 1992-2002 | Evadys 2005- | Stralis (tbd) | EuroTrakker 1993-2004 | Trakker (tbd)')))
PN: 2997760 - 30.09 seconds
([('Europart', '2997760', 'Trommelbremsbelag-Satz', 'Achssatz (2+6) Beral 1642 19091 21.50 mm 2x19091/6x19098', 'beral', 204.68), ('Europart', '2997760', 'TBB-Satz (2+6) D7141 19091 21.80mm', '', 'textar', 179.41)], [], ([('LKW-Teile 24', '2997760', 'Anlasser', 'DT Spare Parts', 261.79), ('LKW-Teile 24', '2997760', '+Line Anlasser für Iveco 4.0 kw 0001123009', '+Line', 146.64), ('LKW-Teile 24', '2997760', '+Line Anlasser für Iveco 4.0 kw 0001123009', '+Line', 146.64)], ('2997760', 'Eurocargo (tbd) | M 1977-1984 | P 1979-1993 | PA 1979-1993 | Zeta 1979-1992')))
PN: 2998646 - 23.20 seconds
([('Europart', '2998646', 'Generator', 'Generator 24V/90A mRS-12N 861286 Mitshubishi Sys Prestolite', 'prestolite', 225.68), ('Europart', '2998646', 'Generator', 'Generator 24V/90A mRS-12N 860712GBA Bosch System pf.0986046040', 'prestolite', 251.72), ('Europart', '2998646', 'AUSTAUSCH GENER ATOR 2998646 Iveco', '', 'No brand specified', 605.38)], [('Winkler', '2998646', 'Lichtmaschine, 24V, 90A', 'Prestolite', 414.0, None), ('Winkler', '2998646', 'Lichtmaschine, 24V, 90A', 'Prestolite', 337.0, None)], ([('LKW-Teile 24', '2998646', 'Generator', 'DT Spare Parts', 197.78), ('LKW-Teile 24', '2998646', 'Generator', 'auger', 234.65), ('LKW-Teile 24', '2998646', '+Line Lichtmaschine für Iveco 90A 0123525502', '+Line', 156.57), ('LKW-Teile 24', '2998646', '+Line Lichtmaschine für Iveco 90A 0123525502', '+Line', 135.89)], ('2998646', 'EuroMover 1998-2005 | EuroStar 1993-2002 | EuroTech 1992-2002 | Powerstar 1999- | Stralis (tbd) | EuroTrakker 1993-2004 | Trakker (tbd)')))
PN: 4786023 - 18.31 seconds
([('Europart', '4786023', 'Starter', 'Starter 24V/5.4kW 10Z. 860684 PRESTOLITE', 'prestolite', 287.3)], [], ([], None))
PN: 4808509 - 17.21 seconds
([], [], ([], None))
PN: 4892318 - 22.37 seconds
([('Europart', '4892318', 'Generator', 'Generator 24V/110A m.RS pf. 4892320 pf. IVECO', 'europart', 189.7), ('Europart', '4892318', 'Generator', 'Generator 24V/110A mRS-8N 860810GB PRESTOLITE', 'prestolite', 199.02), ('Europart', '4892318', 'Generator', 'Generator AT 24V 70A 0 986 045 160 f.Iveco BOSCH', 'bosch', 261.6), ('Europart', '4892318', 'Generator', 'Generator Iveco 4892318', 'No brand specified', 710.28)], [('Winkler', '4892318', 'Lichtmaschine, 28V, 120A', 'Prestolite', 520.0, None)], ([('LKW-Teile 24', '4892318', 'Generator', 'DT Spare Parts', 194.48), ('LKW-Teile 24', '4892318', 'Generator', 'auger', 216.24), ('LKW-Teile 24', '4892318', '+Line Pro Lichtmaschine für Iveco 70A 0124555005', '+Line Pro', 167.75), ('LKW-Teile 24', '4892318', '+Line Lichtmaschine für Iveco 70A 0124555005', '+Line', 127.8), ('LKW-Teile 24', '4892318', 'Bosch Neu Lichtmaschine für Iveco 70A 0124555005', 'Bosch Neu', 163.75)], ('4892318', 'EuroRider 2001-2013 | Eurocargo (tbd)')))
PN: 4892320 - 22.33 seconds
([('Europart', '4892320', 'Generator', 'Generator 24V/110A m.RS pf. 4892320 pf. IVECO', 'europart', 189.7), ('Europart', '4892320', 'Generator', 'Generator 24V/110A mRS-8N 860810GB PRESTOLITE', 'prestolite', 199.02), ('Europart', '4892320', 'Generator', 'Generator AT 24V 90A 0 986 045 430 BOSCH', 'bosch', 277.2), ('Europart', '4892320', 'Generator', 'GENERATOR-GENER ATOR 4892320 Iveco', 'No brand specified', 772.26)], [('Winkler', '4892320', 'Lichtmaschine, 28V, 120A', 'Prestolite', 520.0, None)], ([('LKW-Teile 24', '4892320', 'Generator', 'DT Spare Parts', 194.48), ('LKW-Teile 24', '4892320', 'Generator', 'auger', 216.24), ('LKW-Teile 24', '4892320', 'Generator / Lichtmaschine', 'LT24®', 135.65), ('LKW-Teile 24', '4892320', '+Line Pro Lichtmaschine für Iveco 90A 0124655005', '+Line Pro', 158.0), ('LKW-Teile 24', '4892320', 'Bosch Neu Lichtmaschine für Cummins 90A 0124655020', 'Bosch Neu', 232.5)], ('4892320', 'EuroRider 2001-2013 | Eurocargo (tbd)')))
PN: 42522680 - 17.11 seconds
([('Europart', '42522680', 'Starter', 'Starter 24V/5.4kW 11Z. 860656GB PRESTOLITE', 'prestolite', 292.5)], [], ([], None))
No manufacturer table found for 42522942.
PN: 42522942 - 23.30 seconds
([('Europart', '42522942', 'Starter', 'Starter 24V/6KW 10Z. M93R3004SE PRESTOLITE pf.0001364301', 'prestolite', 206.05)], [('Winkler', '42522942', 'Anlasser, 24V, 6,0kW', 'Prestolite', 356.0, None)], ([('LKW-Teile 24', '42522942', 'Anlasser 24V / 4,8kW', 'Monark', 161.63)], None))
PN: 69502571 - 21.98 seconds
([('Europart', '69502571', 'Starter', 'Starter AT - 12V/2.3kW LRS01471 pf. Iveco LUCAS', 'No brand specified', 131.5), ('Europart', '69502571', 'Starter', 'Starter AT 12V/2.3kW 0 986 018 950 pf. Iveco BOSCH', 'bosch', 162.8), ('Europart', '69502571', 'Starter', 'Starter 12V/3kW 9Z. 20513043 pf.0986018950 PRESTOLITE', 'prestolite', 178.1), ('Europart', '69502571', 'Starter', 'Starter 12V 2.3 kW 8EA 011 610-111 f.IVECO/ RVI Hella ValueFit', 'hella', 177.27)], [('Winkler', '69502571', 'Anlasser, AT, 12V, 2,3kW', 'Bosch', 177.6, None)], ([('LKW-Teile 24', '69502571', 'Anlasser', 'DT Spare Parts', 134.59), ('LKW-Teile 24', '69502571', 'Starter VEMO V27-12-18950', 'VEMO', 109.76), ('LKW-Teile 24', '69502571', 'Anlasser', 'auger', 150.85), ('LKW-Teile 24', '69502571', '+Line Anlasser für Iveco 2.5 kw 0001109306', '+Line', 70.59), ('LKW-Teile 24', '69502571', 'SEG Neu Anlasser für Iveco 2.5 kw 0001109306', 'SEG Neu', 123.0)], ('69502571', 'Daily IV 2006-2009 | Daily IV EcoDaily 2009-2011 | Daily V 2011-2014 | Daily VI 2014-')))
PN: 79014078 - 18.43 seconds
([('Europart', '79014078', 'Generator', 'Generator 24V/55A mRS-1N 860576 PRESTOLITE', 'prestolite', 172.98)], [], ([('LKW-Teile 24', '79014078', 'Generator', 'DT Spare Parts', 150.46)], ('79014078', 'EuroStar 1993-2002 | EuroTech 1992-2002 | Eurocargo (tbd) | MK 1975-1992 | P 1979-1993 | PA 1979-1993 | EuroTrakker 1993-2004 | TurboStar 1984-1993 | TurboTech 1984-1993 | Zeta 1979-1992')))
PN: 99432760 - 20.82 seconds
([('Europart', '99432760', 'Starter', 'Starter NEU 24V 4.5KW 10Z. 8EA 012 586-001 CS1383 Ef.8EA738258111', 'hella', 231.55), ('Europart', '99432760', 'Starter', 'Starter 24V/4.5kW 10Z. 20540102 PRESTOLITE Bosch System', 'prestolite', 271.7), ('Europart', '99432760', 'Starter', 'Starter AT 24V/4.5KW 0 986 022 940 f.Iveco/Mitsub. BOSCH', 'bosch', 370.2)], [('Winkler', '99432760', 'Anlasser, AT, 24V, 4,5kW', 'Bosch', 370.2, None), ('Winkler', '99432760', 'Anlasser, 24V, 4,5kW', 'Prestolite', 413.0, None)], ([('LKW-Teile 24', '99432760', 'Anlasser', 'DT Spare Parts', 181.29), ('LKW-Teile 24', '99432760', 'Anlasser Spannung [V] 24 ', 'PE Automotive', 178.24), ('LKW-Teile 24', '99432760', '+Line Anlasser für Iveco 4.5 kw F042000117', '+Line', 129.55), ('LKW-Teile 24', '99432760', 'Anlasser 24V / 5,0kW', 'Monark', 246.08), ('LKW-Teile 24', '99432760', 'Anlasser 24V / 4,5kW', 'Monark', 163.68)], ('99432760', 'Citelis 2005-2013 | CityClass 1996-2007 | Crossway 2006- | EuroClass 1993-2007 | EuroMover 1998-2005 | EuroRider 2001-2013 | EuroTech 1992-2002 | Evadys 2005- | Stralis (tbd) | EuroTrakker 1993-2004 | Trakker (tbd)')))
Anbieterobject
Teilenummerobject
0
LKW-Teile 24
2995046
1
Europart
2995138
2
Europart
2995138