splitting scraper into a class with methods, headless paramater for debugging, output is working correctly
This commit is contained in:
parent
6177fb351f
commit
431b557962
|
|
@ -0,0 +1,9 @@
|
||||||
|
from scraper import Scraper
|
||||||
|
from playwright.sync_api import sync_playwright, Playwright
|
||||||
|
|
||||||
|
playwright = sync_playwright().start()
|
||||||
|
scraper = Scraper(playwright, True)
|
||||||
|
print(scraper.get_balance())
|
||||||
|
print(scraper.get_transactions())
|
||||||
|
scraper.close()
|
||||||
|
|
||||||
|
|
@ -2,27 +2,31 @@ from playwright.sync_api import sync_playwright, Playwright
|
||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
class Scraper:
|
||||||
|
def __init__(self, playwright: Playwright, headless: bool = True):
|
||||||
|
self.playwright = playwright
|
||||||
|
self.firefox = self.playwright.firefox # or "firefox" or "webkit".
|
||||||
|
self.browser = self.firefox.launch(headless=headless)
|
||||||
|
self.page = self.browser.new_page()
|
||||||
|
self.response = self.page.goto(os.getenv("URL"))
|
||||||
|
self.page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("USERNAME"))
|
||||||
|
self.page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("PASSWORD"))
|
||||||
|
self.page.click("input#ctl00_ContentPlaceHolder1_btnLogin")
|
||||||
|
|
||||||
def run(playwright: Playwright):
|
def get_balance(self):
|
||||||
firefox = playwright.firefox # or "firefox" or "webkit".
|
current_balance = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]").inner_text()
|
||||||
browser = firefox.launch(headless=False)
|
return current_balance
|
||||||
page = browser.new_page()
|
|
||||||
response = page.goto(os.getenv("URL"))
|
|
||||||
page.fill("input#ctl00_ContentPlaceHolder1_txtLoginID", os.getenv("USERNAME"))
|
|
||||||
page.fill("input#ctl00_ContentPlaceHolder1_txtPassword", os.getenv("PASSWORD"))
|
|
||||||
page.click("input#ctl00_ContentPlaceHolder1_btnLogin")
|
|
||||||
html_content = page.content()
|
|
||||||
with open('result.html', 'w', encoding='utf-8') as f:
|
|
||||||
f.write(html_content)
|
|
||||||
current_balance = page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]").inner_text()
|
|
||||||
print(current_balance)
|
|
||||||
browser.close()
|
|
||||||
|
|
||||||
|
def get_transactions(self):
|
||||||
|
self.page.click("xpath=/html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a")
|
||||||
|
transaction_body = self.page.locator("xpath=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody").inner_text()
|
||||||
|
return transaction_body
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.browser.close()
|
||||||
|
|
||||||
|
#xpathbody=/html/body/form/div[3]/div[3]/div[2]/div/div[2]/div[3]/table/tbody
|
||||||
|
#xpathaccountbutton = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[1]/span[2]/a
|
||||||
#xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]
|
#xpath = /html/body/form/div[3]/div[3]/div[2]/div[3]/div[5]/span[2]
|
||||||
|
|
||||||
with sync_playwright() as playwright:
|
|
||||||
run(playwright)
|
|
||||||
Loading…
Reference in a new issue