Skip to content

Commit

Permalink
Merge pull request #135 from PalaVenkiReddy/Repo_details
Browse files Browse the repository at this point in the history
Added Repo details module
  • Loading branch information
nikhil25803 authored Feb 24, 2023
2 parents 2f1ae71 + 1037797 commit cdf37a1
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 6 deletions.
1 change: 1 addition & 0 deletions documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,4 @@ repository = github.Organization(organization_name="Clueless-Community")
| `.repositories()` | Returns the list of repositories of an organization. |
| `.people()` | Returns the list of people in an organization. |
| `.peoples() ` | Returns the number of people in a organization. |
| `.repository_details()` | Returns the list of repositories with their details. |
101 changes: 95 additions & 6 deletions src/scrape_up/github/organization.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,23 +62,49 @@ def avatar(self):
except:
return "No avatar found for this organization"

def __scrape_repositories(self):
def __scrape_repositories_page(self):
"""
scrapes the repositories page of an organization
scrapes the head page of repositories of an organization
"""
organization = self.organization
data = requests.get(f"https://github.com/orgs/{organization}/repositories")
data = BeautifulSoup(data.text, "html.parser")
return data

def __scrape_repositories(self, page):
"""
scrapes the repositories page of an organization
"""
data = requests.get(page)
data = BeautifulSoup(data.text, "html.parser")
return data

def repositories(self):
"""
Returns List of repositories of an organization
"""
page = self.__scrape_repositories()
organization = self.organization
data = self.__scrape_repositories_page()
try:
repositories_body = page.find('div', id = 'org-repositories')
pages_body = data.find('div', class_='paginate-container')
current_page = pages_body.find('em', class_='current')
total_pages = 1
if current_page != None:
total_pages = (int)(current_page['data-total-pages'])

pages = []
if total_pages == 1:
pages.append(f"https://github.com/orgs/{organization}/repositories")
else:
for i in range(1, total_pages + 1):
pages.append(f"https://github.com/orgs/{organization}/repositories?page={i}")

repositories = []
for repo in repositories_body.find_all('a', attrs = {'itemprop': 'name codeRepository'}):
repositories.append(repo.text.strip())
for page in pages:
page_data = self.__scrape_repositories(page)
repositories_body = page_data.find('div', id = 'org-repositories')
for repo in repositories_body.find_all('a', attrs = {'itemprop': 'name codeRepository'}):
repositories.append(repo.text.strip())

return repositories
except:
Expand Down Expand Up @@ -165,5 +191,68 @@ def peoples(self):
except:
return "No people found for this organization"

def repository_stats(self, repo_url):
"""
Returns the stats of a repository
"""
data = self.__scrape_repositories(repo_url)
try:
# forks
forksCount = (
data.find("span", id="repo-network-counter").text.strip()
)
# stars
starCount = (
data.find('span', id = 'repo-stars-counter-star').text.strip()
)
# issues
issuesCount = (
data.find("span", id= "issues-repo-tab-count").text.strip()
)
# pull requests
pullRequests = (
data.find("span", id="pull-requests-repo-tab-count").text.strip()
)

return forksCount, starCount, issuesCount, pullRequests
except:
return "No such repository found"


def repository_details(self):
"""
Returns the details of all the repositories of an organization
"""
organization = self.organization
data = self.__scrape_repositories_page()
try:
pages_body = data.find('div', class_='paginate-container')
current_page = pages_body.find('em', class_='current')
total_pages = 1
if current_page != None:
total_pages = (int)(current_page['data-total-pages'])

pages = []
if total_pages == 1:
pages.append(f"https://github.com/orgs/{organization}/repositories")
else:
for i in range(1, total_pages + 1):
pages.append(f"https://github.com/orgs/{organization}/repositories?page={i}")

repositories = []
for page in pages:
page_data = self.__scrape_repositories(page)
repositories_body = page_data.find('div', id = 'org-repositories')
for repo in repositories_body.find_all('li'):
repo_name = repo.find('a', attrs = {'itemprop': 'name codeRepository'}).text.strip()
repo_url = f"https://github.com{repo.find('a', attrs = {'itemprop': 'name codeRepository'})['href']}"
repo_description_body = repo.find('p', attrs = {'itemprop': 'description'})
repo_description = repo_description_body.text.strip() if repo_description_body != None else "No description"
repo_language_body = repo.find('span', attrs = {'itemprop': 'programmingLanguage'})
repo_language = repo_language_body.text.strip() if repo_language_body != None else "No language"
repo_forks, repo_stars, repo_issues, repo_pull_requests = self.repository_stats(repo_url)
repositories.append({"name": repo_name, "url": repo_url, "description": repo_description, "language": repo_language, "forks": repo_forks, "stars": repo_stars, "issues": repo_issues, "pull_requests": repo_pull_requests})

return repositories
except:
return "No repositories found for this organization"

0 comments on commit cdf37a1

Please sign in to comment.