forked from breck7/pldb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cli.js
executable file
·118 lines (101 loc) · 4.24 KB
/
cli.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#! /usr/bin/env node
const path = require("path")
const { TreeNode } = require("jtree/products/TreeNode.js")
const { Utils } = require("jtree/products/Utils.js")
const { Disk } = require("jtree/products/Disk.node.js")
const { ScrollSetCLI } = require("./ScrollSet.js")
const baseFolder = path.join(__dirname)
const ignoreFolder = path.join(baseFolder, "ignore")
class PLDBCli extends ScrollSetCLI {
conceptsFolder = path.join(baseFolder, "concepts")
grammarFile = "code/measures.scroll"
scrollSetName = "pldb"
compiledConcepts = "./pldb.json"
get keywordsOneHotCsv() {
if (!this.quickCache.keywordsOneHotCsv) this.quickCache.keywordsOneHotCsv = new TreeNode(this.keywordsOneHot).asCsv
return this.quickCache.keywordsOneHotCsv
}
makeNames(concept) {
return [
concept.name,
concept.id,
concept.standsFor,
concept.githubLanguage,
concept.wikipediaTitle,
concept.aka
].filter(i => i)
}
get keywordsOneHot() {
if (this.quickCache.keywordsOneHot) return this.quickCache.keywordsOneHot
const { keywordsTable } = this
const allKeywords = keywordsTable.rows.map(row => row.keyword)
const langsWithKeywords = this.topLanguages.filter(file => file.has("keywords"))
const headerRow = allKeywords.slice()
headerRow.unshift("id")
const rows = langsWithKeywords.map(file => {
const row = [file.id]
const keywords = new Set(file.keywords)
allKeywords.forEach(keyword => {
row.push(keywords.has(keyword) ? 1 : 0)
})
return row
})
rows.unshift(headerRow)
this.quickCache.keywordsOneHot = rows
return rows
}
async crawlGitHubCommand() {
// Todo: figuring out best repo orgnization for crawlers.
// Note: this currently assumes you have measurementscrawlers project installed separateely.
const { GitHubImporter } = require("../measurementscrawlers/github.com/GitHub.js")
const importer = new GitHubImporter(this.concepts, this.conceptsFolder)
await importer.fetchAllRepoDataCommand()
await importer.writeAllRepoDataCommand()
}
async crawlRedditPLCommand() {
// Todo: figuring out best repo orgnization for crawlers.
// Note: this currently assumes you have measurementscrawlers project installed separateely.
const { RedditImporter } = require("../measurementscrawlers/reddit.com/Reddit.js")
const importer = new RedditImporter(this.concepts, this.conceptsFolder)
await importer.createFromAnnouncementsCommand()
}
async crawlGitsCommand(lang) {
const { GitStats } = require("./code/gitStats.js")
// Todo: figuring out best repo orgnization for crawlers.
// Note: this currently assumes you have measurementscrawlers project installed separateely.
const gitsFolder = path.join(ignoreFolder, "node_modules", "gits") // toss in a fake "node_modules" folder to avoid a "scroll list" scan. hacky i know.
this.concepts.forEach(async file => {
if (lang && !lang.includes(file.id)) return
const { mainRepo } = file
if (!mainRepo) return
const targetFolder = path.join(gitsFolder, file.id)
//if (Disk.exists(targetFolder)) return
if (file.repoStats_files) return
if (file.isFinished) return
try {
const gitStats = new GitStats(mainRepo, targetFolder)
if (!Disk.exists(targetFolder)) gitStats.clone()
const tree = this.getTree(file)
tree.touchNode("repoStats").setProperties(gitStats.summary)
if (!tree.has("appeared")) tree.set("appeared", gitStats.firstCommit.toString())
this.save(file, tree)
} catch (err) {
console.error(err, file.id)
}
})
}
searchForConceptByFileExtensions(extensions = []) {
const { extensionsMap } = this
const hit = extensions.find(ext => extensionsMap.has(ext))
return extensionsMap.get(hit)
}
get extensionsMap() {
if (this.quickCache.extensionsMap) return this.quickCache.extensionsMap
this.quickCache.extensionsMap = new Map()
const extensionsMap = this.quickCache.extensionsMap
this.concepts.forEach(concept => concept.extensions.split(" ").forEach(ext => extensionsMap.set(ext, concept.id)))
return extensionsMap
}
}
module.exports = { PLDBCli }
if (!module.parent) Utils.runCommand(new PLDBCli(), process.argv[2], process.argv[3])