Skip to content

Commit

Permalink
add threading to scrape data
Browse files Browse the repository at this point in the history
  • Loading branch information
mrzaizai2k committed Mar 23, 2024
1 parent 201766d commit ca5925d
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
2 changes: 1 addition & 1 deletion notebook/filter-stocks.ipynb

Large diffs are not rendered by default.

38 changes: 36 additions & 2 deletions src/trading_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
import selenium
import shutil
import time
from functools import partial

from datetime import datetime,timedelta
from typing import Literal, Optional

from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError

from src.Utils.utils import *


from selenium import webdriver
Expand All @@ -33,6 +34,7 @@
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import concurrent.futures



Expand Down Expand Up @@ -556,7 +558,7 @@ def convert_xls_csv(self, src_file_path ="data/LichSuKhopLenh_058C647873.xls"):
def close_browser(self):
self.driver.quit()


@timeit
def scrape_trading_data(user_name, password, max_retries=3, wait_time=60):
retries = 0
scraper = TradeScraper(user_name, password)
Expand All @@ -576,3 +578,35 @@ def scrape_trading_data(user_name, password, max_retries=3, wait_time=60):
retries += 1
else:
print(f"Max retries reached. Unable to establish a connection after {max_retries} attempts.")


@timeit
def scrape_trading_data(user_name, password, max_retries=3, wait_time=60):
def scrape_report(report_type):
scraper.scrape_fpts_trading_log(report_type=report_type)

retries = 0
scraper = TradeScraper(user_name, password)

with concurrent.futures.ThreadPoolExecutor() as executor:
while retries < max_retries:
try:
futures = [executor.submit(partial(scrape_report, report_type)) for report_type in ['TradeLog', 'reportprofitloss', 'AssetReport2']]
concurrent.futures.wait(futures) # Wait for all threads to finish
print('Finish scraping financial report!')
scraper.close_browser()
break # If successful, exit the loop
except ConnectionError as e:
print(f"Error: {e}")
print(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
retries += 1
else:
print(f"Max retries reached. Unable to establish a connection after {max_retries} attempts.")


if __name__=="__main__":
TRADE_USER= os.getenv('TRADE_USER')
TRADE_PASS= os.getenv('TRADE_PASS')
# scrape_trading_data(user_name=TRADE_USER, password=TRADE_PASS)
scrape_trading_data_async(user_name=TRADE_USER, password=TRADE_PASS)

0 comments on commit ca5925d

Please sign in to comment.