# Install the Autoscraper
!pip install autoscraper
# Example 1
from autoscraper import AutoScraper
url = 'https://github.com/SurendraRedd?tab=repositories'
fetch_list = ['Notebooks']
scraper = AutoScraper()
result = scraper.build(url=url, wanted_list=fetch_list)
scraper.get_result_similar(url, grouped=True)
# Example 2
from autoscraper import AutoScraper
url = 'https://finance.yahoo.com/quote/AAPL/?guccounter=1'
fetch_list = ['Previous Close', '131.94']
scraper = AutoScraper()
result = scraper.build(url=url, wanted_list=fetch_list)
scraper.get_result_similar(url, grouped=True)
# save the rules and search
scraper.set_rule_aliases({'rule_ae2u': 'price','rule_ivlb': 'Names'})
scraper.keep_rules(['rule_ae2u', 'rule_ivlb'])
scraper.save('AppleSearch')
# Note rule values will need to be updated every time based on the execution.
# Try the above rules in another url
Amz_Url = 'https://finance.yahoo.com/quote/AMZN?p=AMZN'
scraper.get_result_similar(Amz_Url, group_by_alias=True)
# Example 2
from autoscraper import AutoScraper
url = 'https://www.amazon.in/s?k=dell+laptops&ref=nb_sb_noss_2'
fetch_list = ['₹51,499','Dell Inspiron 5406 14" FHD Display 2in1 Laptop (11th Gen i3-1115G4 / 4GB / 256GB SSD / Integrated Graphics/ Win 10 + MS Office / Backlit with FPR / Platinum Silver) D560446WIN9S','10']
scraper = AutoScraper()
result = scraper.build(url=url, wanted_list=fetch_list)
#scraper.get_result_similar(url, group_by_alias=True)
scraper.get_result_similar(url, grouped=True)
# Note: Sometimes Duplicate rule values will be displayed
# save the rules and search
scraper.set_rule_aliases({'rule_ot4i': 'price','rule_konw': 'Names'})
scraper.keep_rules(['rule_ot4i', 'rule_konw'])
scraper.save('DellLaptopSearch')
# Try the above rules in another url
HP_Url = 'https://www.amazon.in/s?k=HP+laptops&ref=nb_sb_noss_2'
scraper.get_result_similar(HP_Url, group_by_alias=True)
# Example 3
from autoscraper import AutoScraper
url = 'https://stackoverflow.com/questions/2081586/web-scraping-with-python'
# We can add one or multiple candidates here.
# You can also put urls here to retrieve urls.
wanted_list = ["What are metaclasses in Python?"]
scraper = AutoScraper()
result = scraper.build(url, wanted_list)
print(result)
['How to execute a program or call a system command from Python', 'What are metaclasses in Python?', 'Does Python have a ternary conditional operator?', 'Convert bytes to a string', "Does Python have a string 'contains' substring method?", 'How to check version of python modules?']
scraper.get_result_similar('https://stackoverflow.com/questions/606191/convert-bytes-to-a-string')