-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* First update on justjoinit scrapper * Update runner * Fix nofluffjobs scrapper * Fix bulldogjob scrapper * Reenable tests
- Loading branch information
Showing
10 changed files
with
128 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import requests | ||
from modules.base_logger import log | ||
from modules.common import updateExcel | ||
|
||
class JustJoinIt(): | ||
def __init__(self): | ||
self.jobs_dict = {} | ||
|
||
def updateJobsDict(self): | ||
url = 'https://justjoin.it/api/offers' | ||
try: | ||
headers = { | ||
"content-type": "application/json, text/plain", | ||
"User-Agent": ( | ||
"Mozilla/5.0 (X11; Linux x86_64; rv:57.0) " | ||
"Gecko/20100101 Firefox/57.0" | ||
), | ||
"Host": "justjoin.it", | ||
"Referer": "justjoin.it", | ||
} | ||
response = requests.get(url, headers=headers, timeout=120) | ||
return response | ||
except Exception as e: | ||
print(f"Exception {e} on updateJobsDict.") | ||
return None | ||
|
||
def prepareJobsDict(self, response, role, lvl, city): | ||
marker_list = [] | ||
city_list = [] | ||
exp_list = [] | ||
|
||
for offer_dict in response.json(): | ||
url = f'https://justjoin.it/offers/{offer_dict["id"]}' | ||
|
||
if offer_dict.get("marker_icon") not in role: | ||
continue | ||
if offer_dict.get("experience_level") not in lvl: | ||
continue | ||
if (offer_dict.get("workplace_type") not in ("remote") and | ||
not (offer_dict.get("workplace_type") not in ("remote") and offer_dict.get("city") in city)): | ||
continue | ||
if offer_dict.get("display_offer") is False: | ||
continue | ||
|
||
job_title = offer_dict.get("title") | ||
job_company = offer_dict.get("company_name") | ||
job_salary = offer_dict.get("employment_types") | ||
job_location = offer_dict.get("city") | ||
|
||
self.jobs_dict[url] = {"Title": [job_title], | ||
"Company": [job_company], | ||
"Salary": [job_salary], | ||
"Location": [job_location]} | ||
marker_list.append(offer_dict.get("marker_icon")) | ||
city_list.append(offer_dict.get("city")) | ||
exp_list.append(offer_dict.get("experience_level")) | ||
|
||
def run(sheetname, role, lvl, city): | ||
log.info("Starting JustJointIt scrapper.") | ||
just = JustJoinIt() | ||
resp = just.updateJobsDict() | ||
just.prepareJobsDict(resp, role, lvl, city) | ||
updateExcel(sheetname, just.jobs_dict) | ||
log.info("Finished JustJoinIt scrapper.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ pandas==2.1.4; python_version > '3.8' | |
pytest==7.4.4 | ||
Requests==2.31.0 | ||
pytest-html==4.1.1 | ||
unidecode==1.3.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,64 +1,36 @@ | ||
import re | ||
import requests | ||
from bs4 import BeautifulSoup | ||
from modules.base_logger import log | ||
from modules.common import updateExcel | ||
from modules.common import getDomainName, updateExcel | ||
|
||
class JustJoinIt(): | ||
def __init__(self): | ||
self.jobs_dict = {} | ||
|
||
def updateJobsDict(self): | ||
url = 'https://justjoin.it/api/offers' | ||
def updateJobsDict(self, url): | ||
domainName = getDomainName(url) | ||
try: | ||
headers = { | ||
"content-type": "application/json, text/plain", | ||
"User-Agent": ( | ||
"Mozilla/5.0 (X11; Linux x86_64; rv:57.0) " | ||
"Gecko/20100101 Firefox/57.0" | ||
), | ||
"Host": "justjoin.it", | ||
"Referer": "justjoin.it", | ||
} | ||
response = requests.get(url, headers=headers, timeout=120) | ||
return response | ||
page = requests.get(url, timeout=120) | ||
page_soup = BeautifulSoup(page.content, "html.parser") | ||
job_links_list = page_soup.find_all("div", {"class": "css-1iq2gw3"}) | ||
|
||
for job in job_links_list: | ||
job_link = "https://"+domainName+job.find('a', class_='css-4lqp8g')['href'] | ||
job_title = job.find('h2').text | ||
job_company = job.find('div', class_=re.compile("css-ldh1c9", re.I)).text | ||
job_salary = job.find('div', class_=re.compile("css-1b2ga3v", re.I)).text | ||
job_location = job.find('div', class_=re.compile("css-68pppj", re.I)).text | ||
self.jobs_dict[job_link] = {"Title": [job_title], | ||
"Company": [job_company], | ||
"Salary": [job_salary], | ||
"Location": [job_location]} | ||
except Exception as e: | ||
print(f"Exception {e} on updateJobsDict.") | ||
return None | ||
|
||
def prepareJobsDict(self, response, role, lvl, city): | ||
marker_list = [] | ||
city_list = [] | ||
exp_list = [] | ||
|
||
for offer_dict in response.json(): | ||
url = f'https://justjoin.it/offers/{offer_dict["id"]}' | ||
|
||
if offer_dict.get("marker_icon") not in role: | ||
continue | ||
if offer_dict.get("experience_level") not in lvl: | ||
continue | ||
if (offer_dict.get("workplace_type") not in ("remote") and | ||
not (offer_dict.get("workplace_type") not in ("remote") and offer_dict.get("city") in city)): | ||
continue | ||
if offer_dict.get("display_offer") is False: | ||
continue | ||
|
||
job_title = offer_dict.get("title") | ||
job_company = offer_dict.get("company_name") | ||
job_salary = offer_dict.get("employment_types") | ||
job_location = offer_dict.get("city") | ||
|
||
self.jobs_dict[url] = {"Title": [job_title], | ||
"Company": [job_company], | ||
"Salary": [job_salary], | ||
"Location": [job_location]} | ||
marker_list.append(offer_dict.get("marker_icon")) | ||
city_list.append(offer_dict.get("city")) | ||
exp_list.append(offer_dict.get("experience_level")) | ||
|
||
def run(sheetname, role, lvl, city): | ||
def run(sheetname, url): | ||
log.info("Starting JustJointIt scrapper.") | ||
just = JustJoinIt() | ||
resp = just.updateJobsDict() | ||
just.prepareJobsDict(resp, role, lvl, city) | ||
just.updateJobsDict(url) | ||
updateExcel(sheetname, just.jobs_dict) | ||
log.info("Finished JustJoinIt scrapper.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters