Skip to content

Commit

Permalink
feat: change open leaderboard user list to community openrank (X-lab2…
Browse files Browse the repository at this point in the history
…017#1619)

Signed-off-by: frank-zsy <[email protected]>
  • Loading branch information
frank-zsy authored Sep 2, 2024
1 parent 6393822 commit 8bd50cc
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 19 deletions.
66 changes: 66 additions & 0 deletions labeled_data/bot/index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1487,6 +1487,72 @@ data:
name: bambootest-bot
- id: 87254977
name: ahmar7
- id: 171268329
name: DigitalPlatFoundation
- id: 77073439
name: webkit-commit-queue
- id: 28621316
name: NordicBuilder
- id: 14542386
name: gentoo-bot
- id: 58790826
name: elasticsearchmachine
- id: 10336601
name: daocloudpublic
- id: 92915184
name: airbyteio
- id: 76178356
name: OSBotify
- id: 101608348
name: llamatester
- id: 17281578
name: yugabyte-ci
- id: 45043797
name: nixos-discourse
- id: 47979223
name: rustbot
- id: 110908925
name: bot-gitexp-user
- id: 120460335
name: pix-bot-github
- id: 69588470
name: posthog-bot
- id: 97558700
name: mui-bot
- id: 152529449
name: bot2-harness
- id: 43204447
name: app-sre-bot
- id: 115476073
name: Satellite-QE
- id: 177514479
name: BambulabRobot
- id: 48055056
name: ch-code-analysis
- id: 41093903
name: sysopenci
- id: 137281497
name: bcbuild-github-agent
- id: 115160210
name: cloud-platform-concourse-bot
- id: 58012459
name: llvm-ci
- id: 111915794
name: gravity-ui-bot
- id: 129911861
name: leanprover-community-mathlib4-bot
- id: 37885440
name: metamaskbot
- id: 75649378
name: modernappsninjabot
- id: 58891217
name: bazel-flag-bot
- id: 46904574
name: flinkbot
- id: 39309875
name: spinnakerbot
- id: 40367165
name: asf-ci
- name: Gitee
type: Code Hosting
users:
Expand Down
64 changes: 51 additions & 13 deletions src/cron/tasks/open_leaderboard.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { Task } from '..';
import { getRepoActivity, getRepoOpenrank, getUserActivity, getUserOpenrank } from '../../metrics/indices';
import { getRepoActivity, getRepoOpenrank, getUserActivity, getUserCommunityOpenrank } from '../../metrics/indices';
import { forEveryMonth } from '../../metrics/basic';
import { getLogger, rankData } from '../../utils';
import { getPlatformData } from '../../label_data_utils';

const task: Task = {
cron: '0 0 15 * *', // runs on the 15th day of every month at 00:00
Expand Down Expand Up @@ -321,8 +322,13 @@ const task: Task = {

// get Chinese actor OpenRank
// by month
const chineseUserMonthOpenrankData = await getUserOpenrank({
labelUnion: [':regions/CN'],
const chineseUserLabelData = getPlatformData([':regions/CN']);
const platformData: any = chineseUserLabelData.map(i => ({
platform: i.name,
userIds: i.users.map(u => u.id),
}));
const chineseUserMonthOpenrankData = await getUserCommunityOpenrank({
idOrNames: platformData,
startYear, startMonth, endYear, endMonth,
order: 'DESC', limit: -1, precision: 2,
groupTimeRange: 'month', limitOption: 'each',
Expand All @@ -334,8 +340,8 @@ const task: Task = {
}
writeData(chineseUserMonthOpenrankMap, 'Actor_China_Month', 'open_rank/actor/chinese');
// by year
const chineseUserYearOpenrankData = await getUserOpenrank({
labelUnion: [':regions/CN'],
const chineseUserYearOpenrankData = await getUserCommunityOpenrank({
idOrNames: platformData,
startYear, startMonth, endYear, endMonth,
order: 'DESC', limit: -1, precision: 2,
groupTimeRange: 'year', limitOption: 'each',
Expand All @@ -350,22 +356,54 @@ const task: Task = {

// get global actor OpenRank
// by month
const globalUserMonthOpenrankData = await getUserOpenrank({
startYear, startMonth, endYear, endMonth,
order: 'DESC', limit, precision: 2, limitOption: 'each',
groupTimeRange: 'month', whereClause: `actor_id IN (SELECT id FROM ${exportUserTableName})`,
});
// too memory consuming, split to batches and merge in memory
const globalUserMonthOpenrankDataArray = [
await getUserCommunityOpenrank({
startYear, startMonth, endYear: 2019, endMonth: 12,
order: 'DESC', limit, precision: 2, limitOption: 'each',
groupTimeRange: 'month', options: { withoutDetail: true, withBot: false }
}), await getUserCommunityOpenrank({
startYear: 2020, startMonth: 1, endYear, endMonth,
order: 'DESC', limit, precision: 2, limitOption: 'each',
groupTimeRange: 'month', options: { withoutDetail: true, withBot: false }
})
];
const globalUserMonthOpenrankData = globalUserMonthOpenrankDataArray[0];
const firstBatchMonthes = globalUserMonthOpenrankDataArray[0][0].openrank.length;
const secondBatchMonthes = globalUserMonthOpenrankDataArray[1][0].openrank.length;
for (const row of globalUserMonthOpenrankData) {
const { platform, name } = row;
const secondRow = globalUserMonthOpenrankDataArray[1].find(i => i.name === name && i.platform === platform);
if (secondRow) {
// second batch has the user
row.openrank = row.openrank.concat(secondRow.openrank);
} else {
// the user not found in the second batch
row.openrank = row.openrank.concat(Array.from({ length: secondBatchMonthes }, () => 0));
}
}
for (const row of globalUserMonthOpenrankDataArray[1]) {
const { platform, name, openrank } = row;
const firstRow = globalUserMonthOpenrankDataArray[0].find(i => i.name === name && i.platform === platform);
if (!firstRow) {
// the user not found in the first batch
globalUserMonthOpenrankData.push({
platform, name, openrank: Array.from({ length: firstBatchMonthes }, () => 0).concat(openrank)
});
}
}
logger.info(`Get global user month openrank data done, count=${globalUserMonthOpenrankData.length}`);
const globalUserMonthOpenrankMap = rankData(globalUserMonthOpenrankData!, allMonthes, (item, _, index) => item.openrank[index], item => { return { name: item.name, id: item.id }; });
let globalUserMonthOpenrankMap = rankData(globalUserMonthOpenrankData!, allMonthes, (item, _, index) => item.openrank[index], item => { return { name: item.name, id: item.id }; });
for (const k of globalUserMonthOpenrankMap.keys()) {
globalUserMonthOpenrankMap.set(k, globalUserMonthOpenrankMap.get(k)!.filter(i => i.value > 0));
}
writeData(globalUserMonthOpenrankMap, 'Actor_Global_Month', 'open_rank/actor/global');

// by year
const globalUserYearOpenrankData = await getUserOpenrank({
const globalUserYearOpenrankData = await getUserCommunityOpenrank({
startYear, startMonth, endYear, endMonth,
order: 'DESC', limit, precision: 2, limitOption: 'each',
groupTimeRange: 'year', whereClause: `actor_id IN (SELECT id FROM ${exportUserTableName})`,
groupTimeRange: 'year', options: { withoutDetail: true, withBot: false }
});
logger.info(`Get global user year openrank data done, count=${globalUserYearOpenrankData.length}`);
const globalUserYearOpenrankMap = rankData(globalUserYearOpenrankData!, allYears, (item, _, index) => item.openrank[index], item => { return { name: item.name, id: item.id }; });
Expand Down
15 changes: 9 additions & 6 deletions src/metrics/indices.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,9 @@ ${getOutterOrderAndLimit({ ...config, order: undefined }, 'openrank')}
};

interface UserCommunityOpenRankConfig {
limit: number;
withBot: boolean;
withoutDetail?: boolean;
limit?: number;
withBot?: boolean;
};
export const getUserCommunityOpenrank = async (config: QueryConfig<UserCommunityOpenRankConfig>) => {
config = getMergedConfig(config);
Expand All @@ -181,6 +182,7 @@ export const getUserCommunityOpenrank = async (config: QueryConfig<UserCommunity
const timeRangeClause = getTimeRangeWhereClause(config);
if (timeRangeClause) whereClause.push(timeRangeClause);
const limit = (config.options?.limit == undefined) ? 30 : config.options.limit;
const withoutDetail = config.options?.withoutDetail;
if (config.options?.withBot === false) {
const botLabelData = getPlatformData([':bot']);
for (const b of botLabelData) {
Expand All @@ -195,14 +197,14 @@ SELECT
${getTopLevelPlatform(config)},
argMax(name, time) AS name,
${getGroupArrayInsertAtClause(config, { value: 'openrankValue', key: 'openrank' })},
${getGroupArrayInsertAtClause(config, { key: 'openrankDetails', noPrecision: true, defaultValue: '[]' })}
${withoutDetail ? '[] as openrankDetails' : getGroupArrayInsertAtClause(config, { key: 'openrankDetails', noPrecision: true, defaultValue: '[]' })}
FROM
(
SELECT
id, argMax(name, time) AS name, platform, time,
${limit > 0 ?
${withoutDetail ? '' : (limit > 0 ?
`arraySlice(groupArray((platform, repo_id, repo_name, openrank)), 1, ${limit}) AS openrankDetails` :
`groupArray((platform, repo_id, repo_name, openrank)) AS openrankDetails`},
`groupArray((platform, repo_id, repo_name, openrank)) AS openrankDetails`) + ','}
SUM(openrank) AS openrankValue
FROM
(
Expand Down Expand Up @@ -241,10 +243,11 @@ FROM
platform, repo_id, actor_id, time
) data
GROUP BY id, repo_id, platform, time
ORDER BY openrank DESC
${withoutDetail ? '' : 'ORDER BY openrank DESC'}
)
GROUP BY id, platform, time
${userWhereClause ? `HAVING ${userWhereClause}` : ''}
${getInnerOrderAndLimit(config, 'openrankValue')}
)
GROUP BY id, platform
${getOutterOrderAndLimit(config, 'openrank')}
Expand Down

0 comments on commit 8bd50cc

Please sign in to comment.