splitting scraper into a class with methods, headless paramater for debugging, output is working correctly

This commit is contained in:
jethro 2026-05-13 16:31:12 +12:00
parent 6177fb351f
commit 431b557962
2 changed files with 30 additions and 17 deletions

View file

@ -0,0 +1,9 @@
from scraper import Scraper
from playwright.sync_api import sync_playwright, Playwright
playwright = sync_playwright().start()
scraper = Scraper(playwright, True)
print(scraper.get_balance())
print(scraper.get_transactions())
scraper.close()

View file

@ -2,27 +2,31 @@ from playwright.sync_api import sync_playwright, Playwright
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
class Scraper:
def __init__(self, playwright: Playwright, headless: bool = True):
self.playwright = playwright
self.firefox = self.playwright.firefox # or "firefox" or "webkit".
self.browser = self.firefox.launch(headless=headless)
self.page = self.browser.new_page()
self.response = self.page.goto(os.getenv("URL"))
self.page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("USERNAME"))
self.page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("PASSWORD"))
self.page.click("input#ctl00_ContentPlaceHolder1_btnLogin")
def run(playwright: Playwright): def get_balance(self):
firefox = playwright.firefox # or "firefox" or "webkit". current_balance = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]").inner_text()
browser = firefox.launch(headless=False) return current_balance
page = browser.new_page()
response = page.goto(os.getenv("URL"))
page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("USERNAME"))
page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("PASSWORD"))
page.click("input#ctl00_ContentPlaceHolder1_btnLogin")
html_content = page.content()
with open('result.html', 'w', encoding='utf-8') as f:
f.write(html_content)
current_balance = page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]").inner_text()
print(current_balance)
browser.close()
def get_transactions(self):
self.page.click("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a")
transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text()
return transaction_body
def close(self):
self.browser.close()
#xpathbody=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody
#xpathaccountbutton = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a
#xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2] #xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]
with sync_playwright() as playwright:
run(playwright)