From 0096b26517f449ccaef0401bf19f753217f46154 Mon Sep 17 00:00:00 2001 From: PalaVenkiReddy Date: Fri, 24 Feb 2023 01:46:12 +0530 Subject: [PATCH 1/3] Added Repository Details --- index.html | 16 +++++ src/scrape_up/github/organization.py | 104 +++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 index.html diff --git a/index.html b/index.html new file mode 100644 index 00000000..913e4d2a --- /dev/null +++ b/index.html @@ -0,0 +1,16 @@ + + + + HTML Boilerplate + + + + + +

Hello, world!

+ + diff --git a/src/scrape_up/github/organization.py b/src/scrape_up/github/organization.py index 4b66462e..b37fe4ff 100644 --- a/src/scrape_up/github/organization.py +++ b/src/scrape_up/github/organization.py @@ -62,23 +62,49 @@ def avatar(self): except: return "No avatar found for this organization" - def __scrape_repositories(self): + def __scrape_repositories_page(self): """ - scrapes the repositories page of an organization + scrapes the head page of repositories of an organization """ organization = self.organization data = requests.get(f"https://github.com/orgs/{organization}/repositories") + data = BeautifulSoup(data.text, "html.parser") + return data + + def __scrape_repositories(self, page): + """ + scrapes the repositories page of an organization + """ + data = requests.get(page) + data = BeautifulSoup(data.text, "html.parser") + return data def repositories(self): """ Returns List of repositories of an organization """ - page = self.__scrape_repositories() + organization = self.organization + data = self.__scrape_repositories_page() try: - repositories_body = page.find('div', id = 'org-repositories') + pages_body = data.find('div', class_='paginate-container') + current_page = pages_body.find('em', class_='current') + total_pages = 1 + if current_page != None: + total_pages = (int)(current_page['data-total-pages']) + + pages = [] + if total_pages == 1: + pages.append(f"https://github.com/orgs/{organization}/repositories") + else: + for i in range(1, total_pages + 1): + pages.append(f"https://github.com/orgs/{organization}/repositories?page={i}") + repositories = [] - for repo in repositories_body.find_all('a', attrs = {'itemprop': 'name codeRepository'}): - repositories.append(repo.text.strip()) + for page in pages: + page_data = self.__scrape_repositories(page) + repositories_body = page_data.find('div', id = 'org-repositories') + for repo in repositories_body.find_all('a', attrs = {'itemprop': 'name codeRepository'}): + repositories.append(repo.text.strip()) return repositories except: @@ -165,5 +191,71 @@ def peoples(self): except: return "No people found for this organization" + def repository_stats(self, repo_url): + """ + Returns the stats of a repository + """ + data = self.__scrape_repositories(repo_url) + try: + # forks + forksCount = ( + data.find("span", id="repo-network-counter").text.strip() + ) + # stars + starCount = ( + data.find('span', id = 'repo-stars-counter-star').text.strip() + ) + # issues + issuesCount = ( + data.find("span", id= "issues-repo-tab-count").text.strip() + ) + # pull requests + pullRequests = ( + data.find("span", id="pull-requests-repo-tab-count").text.strip() + ) + + return forksCount, starCount, issuesCount, pullRequests + except: + return "No such repository found" + + def repository_details(self): + """ + Returns the details of all the repositories of an organization + """ + organization = self.organization + data = self.__scrape_repositories_page() + try: + pages_body = data.find('div', class_='paginate-container') + current_page = pages_body.find('em', class_='current') + total_pages = 1 + if current_page != None: + total_pages = (int)(current_page['data-total-pages']) + + pages = [] + if total_pages == 1: + pages.append(f"https://github.com/orgs/{organization}/repositories") + else: + for i in range(1, total_pages + 1): + pages.append(f"https://github.com/orgs/{organization}/repositories?page={i}") + + repositories = [] + for page in pages: + page_data = self.__scrape_repositories(page) + repositories_body = page_data.find('div', id = 'org-repositories') + for repo in repositories_body.find_all('li'): + repo_name = repo.find('a', attrs = {'itemprop': 'name codeRepository'}).text.strip() + repo_url = f"https://github.com{repo.find('a', attrs = {'itemprop': 'name codeRepository'})['href']}" + repo_description_body = repo.find('p', attrs = {'itemprop': 'description'}) + repo_description = repo_description_body.text.strip() if repo_description_body != None else "No description" + repo_language_body = repo.find('span', attrs = {'itemprop': 'programmingLanguage'}) + repo_language = repo_language_body.text.strip() if repo_language_body != None else "No language" + repo_forks, repo_stars, repo_issues, repo_pull_requests = self.repository_stats(repo_url) + repositories.append({"name": repo_name, "url": repo_url, "description": repo_description, "language": repo_language, "forks": repo_forks, "stars": repo_stars, "issues": repo_issues, "pull_requests": repo_pull_requests}) + + return repositories + except: + return "No repositories found for this organization" +org = Organization("Clueless-Community") +print(org.repository_details()) \ No newline at end of file From c4ebdd55843e90176e50d1cf7ec86906324bb95b Mon Sep 17 00:00:00 2001 From: PalaVenkiReddy Date: Fri, 24 Feb 2023 01:48:24 +0530 Subject: [PATCH 2/3] Added Repository Details --- documentation.md | 1 + index.html | 16 ---------------- 2 files changed, 1 insertion(+), 16 deletions(-) delete mode 100644 index.html diff --git a/documentation.md b/documentation.md index e9dc7038..4a8eb545 100644 --- a/documentation.md +++ b/documentation.md @@ -129,3 +129,4 @@ repository = github.Organization(organization_name="Clueless-Community") | `.repositories()` | Returns the list of repositories of an organization. | | `.people()` | Returns the list of people in an organization. | | `.peoples() ` | Returns the number of people in a organization. | +| `.repository_details()` | Returns the list of repositories with their details. | diff --git a/index.html b/index.html deleted file mode 100644 index 913e4d2a..00000000 --- a/index.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - HTML Boilerplate - - - - - -

Hello, world!

- - From 1037797a7131c5037d2aac26c80e4e5d64d55af0 Mon Sep 17 00:00:00 2001 From: PalaVenkiReddy Date: Fri, 24 Feb 2023 01:54:36 +0530 Subject: [PATCH 3/3] Modified documentation.md file --- src/scrape_up/github/organization.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/scrape_up/github/organization.py b/src/scrape_up/github/organization.py index b37fe4ff..635aefbf 100644 --- a/src/scrape_up/github/organization.py +++ b/src/scrape_up/github/organization.py @@ -256,6 +256,3 @@ def repository_details(self): return repositories except: return "No repositories found for this organization" - -org = Organization("Clueless-Community") -print(org.repository_details()) \ No newline at end of file