Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
swkim101 committed Aug 6, 2024
0 parents commit bcb1c1b
Showing 1,433 changed files with 21,080 additions and 0 deletions.
29 changes: 29 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# flyctl launch added from .gitignore
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
node_modules
.pnp
**/.pnp.js

# testing
coverage

# production
build

# misc
**/.DS_Store
**/.env.local
**/.env.development.local
**/.env.test.local
**/.env.production.local

**/npm-debug.log*
**/yarn-debug.log*
**/yarn-error.log*

**/db.cspapers.org
fly.toml

**/rawdata
16 changes: 16 additions & 0 deletions .github/workflows/fly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Fly Deploy
on:
push:
branches:
- main
jobs:
deploy:
name: Deploy app
runs-on: ubuntu-latest
concurrency: deploy-group # optional: ensure only one action runs at a time
steps:
- uses: actions/checkout@v4
- uses: superfly/flyctl-actions/setup-flyctl@master
- run: flyctl deploy --remote-only
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
26 changes: 26 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# production
/build

# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local

npm-debug.log*
yarn-debug.log*
yarn-error.log*

db.cspapers.org/
rawdata/
13 changes: 13 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM golang:1.22 AS build
WORKDIR /src
COPY . .
RUN make index
RUN make server

FROM alpine:latest
WORKDIR /
COPY --from=build /src/server /server
COPY --from=build /src/db.cspapers.org /db.cspapers.org
COPY ./conf.json /conf.json
CMD ["/server", "-config", "/conf.json"]
EXPOSE 8000
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
index:
go run ./api.cspapers.org/index

server:
GOOS=linux CGO_ENABLED=0 go build -ldflags="-s -w" -o server ./api.cspapers.org/server/
62 changes: 62 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# cspapers.org

An index for computer science papers.

## How to add papers

If you would like to add some papers, please add them in `data/` and make a PR. Once it is merged, CD/CI will reindex and distribute automatically.

## How to run local

Install dependencies
```bash
npm install
```

Build and run

```bash
# generate index db
go run api.cspapers.org/index -debug
# run server
go run api.cspapers.org/server -debug
# run web
npm run start
```

or

```
docker build -t server .
docker run -it server
npm run start
```

try: http://localhost:8000/?query=fuzzing&yearFrom=2014&yearTo=2024&skip=0&take=20

## Why not Google scholar

Poor conference filter:

![google scholar](image.png)

¯\\_(ツ)_

## Todo

* Pagination
* Add more papers

PR is welcome

## Acknowlegement

Refered csrankings.org for organizing conferences.

## Disclaimer

cspapers.org is served as implemented in this repository. I do not modify source code or data in and after distributions.
As shown in the source code, cspapers.org does not collect user data, such as search history and IP address. However, Cloudflare and fly.io do this for pricing and security purpose.
I personally own stock (less than \$10k) of Cloudflare and I hope they will be profited so I can be rich. This is why I buy a domain name from Cloudflare - that will add ~10$ per year to net sales.
cspapers.org uses fly.io because it is the cheapest server. It costs ~2$ per month with 1 core and 256MB RAM. I am always willing to migrate to a cheaper one. Suggestion is welcome.
Each conference site provides origins of titles and abstracts of papers.
42 changes: 42 additions & 0 deletions api.cspapers.org/config/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package config

import (
"bytes"
"encoding/json"
"fmt"
"os"
)

type Config struct {
ConnectionString string `json:"connection_string"`
Type string `json:"type"`
}

/* refered https://github.com/google/syzkaller/blob/master/pkg/mgrconfig/config.go */

func DefaultValues() *Config {
return &Config{
ConnectionString: "db.cspaper.org",
Type: "bleve",
}
}

func LoadFile(filename string, cfg interface{}) error {
if filename == "" {
return fmt.Errorf("no config file specified")
}
data, err := os.ReadFile(filename)
if err != nil {
return fmt.Errorf("failed to read config file: %w", err)
}
return loadData(data, cfg)
}

func loadData(data []byte, cfg interface{}) error {
dec := json.NewDecoder(bytes.NewReader(data))
// dec.DisallowUnknownFields()
if err := dec.Decode(cfg); err != nil {
return fmt.Errorf("failed to parse config file: %w", err)
}
return nil
}
30 changes: 30 additions & 0 deletions api.cspapers.org/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package config_test

import (
"encoding/json"
"path/filepath"
"testing"

. "github.com/swkim101/cspapers.org/api.cspapers.org/config"
)

func TestConfig(t *testing.T) {
files, err := filepath.Glob(filepath.Join("testdata", "*.json"))
if err != nil || len(files) == 0 {
t.Fatalf("failed to read input files: %v", err)
}
for _, file := range files {
t.Run(file, func(t *testing.T) {
cfg := &Config{}
err := LoadFile(file, cfg)
if err != nil {
t.Fatal(err)
}
blob, err := json.Marshal(cfg)
if err != nil {
t.Fatal(err)
}
t.Log(string(blob))
})
}
}
4 changes: 4 additions & 0 deletions api.cspapers.org/config/testdata/config_test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"connection_string": "aa",
"type": "bleve"
}
133 changes: 133 additions & 0 deletions api.cspapers.org/db/bleve/bleve.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package bleve

import (
"errors"
"fmt"
"os"
"strings"

"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/swkim101/cspapers.org/api.cspapers.org/db/dbimpl"
"github.com/swkim101/cspapers.org/api.cspapers.org/log"
"github.com/swkim101/cspapers.org/api.cspapers.org/types"
)

func init() {
dbimpl.Register("bleve", ctor, search, insert)
}

var (
index bleve.Index
flagDebug bool
)

func ctor(connStr string, debug bool) (err error) {
err = nil
flagDebug = debug
if _, err = os.Stat(connStr); err == nil {
/* if exist */
index, err = bleve.Open(connStr)
} else if errors.Is(err, os.ErrNotExist) {
/* if not exist */
mapping := bleve.NewIndexMapping()
index, err = bleve.New(connStr, mapping)
}
if flagDebug {
b, err := index.Stats().MarshalJSON()
if err != nil {
log.Fatalf("%v", err)
} else {
log.Debugf("%v", string(b))
}
}
return err
}

func search(req *types.SearchRequest) *types.SearchResponse {
/* 2014 - 2024 */
yearFrom := bleve.NewQueryStringQuery(fmt.Sprintf("year:>=%v", req.YearFrom))
yearTo := bleve.NewQueryStringQuery(fmt.Sprintf("year:<=%v", req.YearTo))
year := bleve.NewConjunctionQuery(yearFrom, yearTo)

/* AND */
/* NDSS or AAAI or ... */
venueQuery := []query.Query{}
for _, v := range req.Venue {
if v == "" {
continue
}
qs := fmt.Sprintf("venue:%v", strings.ToLower(v))
venueQuery = append(venueQuery, bleve.NewQueryStringQuery(qs))
}
venue := bleve.NewDisjunctionQuery(venueQuery...)

/* AND */
/* title like <query> or abstract like <query> */
keywordQuery := []query.Query{}
req.Query = strings.TrimSpace(req.Query)
keywordQuery = append(keywordQuery, bleve.NewFuzzyQuery(strings.ToLower(req.Query)))
if isWord(req.Query) {
qs := fmt.Sprintf("/.*%v.*/", strings.ToLower(req.Query))
keywordQuery = append(keywordQuery, bleve.NewQueryStringQuery(qs))
}
keyword := bleve.NewDisjunctionQuery(keywordQuery...)

query := bleve.NewConjunctionQuery(year, venue, keyword)

log.Debugf("%v", req.Query)
search := bleve.NewSearchRequest(query)
sortBy := ""
switch req.OrderBy {
case "score":
sortBy = "_score"
case "date":
sortBy = "year"
default:
log.Printf("unknown orderby flag %v", req.OrderBy)
return &types.SearchResponse{}
}
if !req.Ascending {
sortBy = "-" + sortBy
}
search.SortBy([]string{sortBy})
searchResults, err := index.Search(search)
if err != nil {
log.Printf("%v", err)
return &types.SearchResponse{}
}
log.Debugf("%v", searchResults)
data := []*types.SearchResponseUnit{}
for _, v := range searchResults.Hits {
year, venue, title, err := types.Decompose(types.Index(v.ID))
if err != nil {
log.Printf("err in decomposing %v, %v", v.ID, err)
continue
}
data = append(data, &types.SearchResponseUnit{
Index: v.ID,
Score: v.Score,
Paper: types.Paper{
Year: year,
Venue: venue,
Title: title,
},
})
}

return &types.SearchResponse{
Total: len(data),
Data: data,
}
}

func insert(req *types.InsertRequest) error {
req.Title = strings.ToLower(req.Title)
req.Venue = strings.ToLower(req.Venue)
req.Abstract = strings.ToLower(req.Abstract)
return index.Index(req.ToIndex(), req)
}

func isWord(s string) bool {
return len(strings.Split(s, " ")) == 1
}
17 changes: 17 additions & 0 deletions api.cspapers.org/db/db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package db

import (
"fmt"

"github.com/swkim101/cspapers.org/api.cspapers.org/config"
_ "github.com/swkim101/cspapers.org/api.cspapers.org/db/bleve"
"github.com/swkim101/cspapers.org/api.cspapers.org/db/dbimpl"
)

func Connect(cfg *config.Config) (*dbimpl.Type, error) {
typ, ok := dbimpl.Types[cfg.Type]
if !ok {
return nil, fmt.Errorf("unknown db type '%v'", cfg.Type)
}
return typ, nil
}
Loading

0 comments on commit bcb1c1b

Please sign in to comment.