-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathuser.py
120 lines (113 loc) · 3.57 KB
/
user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
from utils import FileWriter, GraphQL
class User:
count = 0
query = """
query {
search(query: "location:China", type: USER, first:%d, after: %s) {
userCount
pageInfo {
startCursor
hasNextPage
hasPreviousPage
endCursor
}
edges {
cursor
node {
... on User {
name
login
location
url
bio
contributionsCollection {
totalIssueContributions
totalCommitContributions
totalRepositoryContributions
totalPullRequestContributions
totalPullRequestReviewContributions
totalRepositoriesWithContributedIssues
}
followers {
totalCount
}
repositories {
totalCount
}
organizations {
totalCount
}
pullRequests {
totalCount
}
starredRepositories {
totalCount
}
status {
message
}
company
websiteUrl
}
}
}
}
}
"""
endCursor = "null"
def __init__(self):
self.count = 0
self.data = {}
self.reform = []
self.df = None
def fetch(self, num=10, batch_size=10):
times = int(num/batch_size)
if times < 1:
times = 1
print("data numbers: %d" % num)
print("batch_size: %d" % batch_size)
# times = num
rest = num % batch_size
if rest > 0:
times = times+1
print("times: %d" % times)
for i in range(times):
# print(self.query % self.endCursor)
print("Request #%d" % (i+1))
# print(self.endCursor)
if i == times-1 and rest > 0:
query = self.query % (rest, self.endCursor)
else:
query = self.query % (batch_size, self.endCursor)
data = GraphQL.execute(query)
if self.data:
self.data["search"]["edges"] = self.data["search"]["edges"] + data["search"]["edges"]
self.data["search"]["pageInfo"] = self.data["search"]["pageInfo"]
else:
self.data = data
self.endCursor = "\"%s\"" % data["search"]["pageInfo"]["endCursor"]
print("Finshed #%d" % (i+1))
# print(self.data)
def preprocessing(self):
print("Data preprocessing")
if self.data:
self.reform = self.data["search"]["edges"]
cursors = list(map(lambda x: x["cursor"], self.reform))
self.reform = list(map(lambda x: x["node"], self.reform))
for (index, i) in enumerate(self.reform):
self.reform[index]["starredRepositories"] = i["starredRepositories"]["totalCount"]
self.reform[index]["followers"] = i["followers"]["totalCount"]
self.reform[index]["pullRequests"] = i["pullRequests"]["totalCount"]
self.reform[index]["organizations"] = i["organizations"]["totalCount"]
self.reform[index]["repositories"] = i["repositories"]["totalCount"]
self.reform[index]["cursor"] = cursors[index]
if i["bio"]:
self.reform[index]["bio"] = i["bio"].replace('\n', '').replace('\r', '')
def toDataFrame(self):
self.preprocessing()
self.df = pd.json_normalize(self.reform)
print(self.df)
def saveCSV(self, fileName, mode):
print("Save data")
FileWriter.writeFile(self.df, fileName, mode)