diff --git a/dev-documentation.md b/dev-documentation.md index 2673a397..171534b1 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1616,3 +1616,20 @@ First create an object of class `Dictionary`. | `.word_of_the_day_definition()` | Returns the definition of the word of the day. | --- + +## Indeed + +First create an object of class `Indeed`. +```python +from scrape_up import indeed + +indeed_job = indeed.get_url(positon="business manager",location="Geneva") +indeed_job.get_record() +``` + + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + +--- diff --git a/documentation.md b/documentation.md index 9bb5230c..45508751 100644 --- a/documentation.md +++ b/documentation.md @@ -733,3 +733,21 @@ boxoffice = imdb.BoxOffice() | Methods | Details | | --------------- | ------------------------------------------------------------------------------ | | `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released.| + + +#### Indeed + +Create an object of class `Indeed`. +```python +from scrape_up import indeed + +indeed_job = indeed.get_url(positon="business manager",location="Geneva") +indeed_job.get_record() +``` + + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + +--- diff --git a/src/scrape_up/Indeed/__init__.py b/src/scrape_up/Indeed/__init__.py new file mode 100644 index 00000000..d8c6ca60 --- /dev/null +++ b/src/scrape_up/Indeed/__init__.py @@ -0,0 +1,2 @@ +from .indeed import Indeed +__all__= ["Indeed"] diff --git a/src/scrape_up/Indeed/indeed.py b/src/scrape_up/Indeed/indeed.py new file mode 100644 index 00000000..480964dd --- /dev/null +++ b/src/scrape_up/Indeed/indeed.py @@ -0,0 +1,70 @@ +import requests +from bs4 import BeautifulSoup +import csv +from datetime import datetime + +class Indeed: + """ + Create an instance of `Indeed` class. + ```python + indeed = Indeed() + ``` + | Methods | Details | + | ---------------------- | ------------------------------------------------------------------------------------------------ | + | `.get_url()` | Returns the URL of the job having a specific position and location. | + | `.get_record()` | Returns the company details like job title, company name, location, job post date, and salary. | + """ + + def __init__(self): + self.position=position + self.location=location + + def get_url (self, position,location): + template = 'https://www.indeed.com/jobs?q={}&l={}' + url = template.format(position,location) + return url + + #getting the record + def get_record(self, card): + atag1= card.h2.a.span + job_title= atag1.get('title') + atag2= card.h2.a + job_url= 'https://indeed.com'+atag2.get('href') + + company= card.find('span','companyName').text.strip() + location= card.find('div','companyLocation').text.strip() + summary= card.find('div','job-snippet').text.strip() + posted_date= card.find('span','date').text.strip() + today= datetime.today().strftime('%Y-%m-%d') + + try: + salary = card.find('div','metadata estimated-salary-container').text.strip() + except AttributeError: + salary = '' + + record = (job_title, job_url, location, company, posted_date, today, summary, salary) + return record + + #writing the main function + def main(self, position, location): + records = [] + url = get_url(position, location) + + while True: + response=requests.get(url) + soup = BeautifulSoup(response.text,'html.parser') + cards=soup.find_all('div','job_seen_beacon') + for card in cards: + record=get_record(card) + records.append(record) + try: + url='https://indeed.com'+soup.find('a',{'aria-label':'Next'}).get('href') + except AttributeError: + break + + with open(f'{position}-{location}.csv','w',newline='',encoding= 'utf-8') as f: + writer= csv.writer(f) + writer.writerow(['Job_Title', 'Job_Url', 'Location', 'Company', 'Post_Date', 'Extraction_Date', 'Summary', 'Salary']) + writer.writerows(records) + +main('business manager', 'Geneva') #creating a demo csv file to access the records