Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

postings compression exploration #629

Draft
wants to merge 18 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 57 additions & 3 deletions encoding/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,16 @@ var (

// Encbuf is a helper type to populate a byte slice with various types.
type Encbuf struct {
B []byte
C [binary.MaxVarintLen64]byte
B []byte
C [binary.MaxVarintLen64]byte
Count uint8
}

func (e *Encbuf) Reset() {
e.B = e.B[:0]
e.Count = 0
}

func (e *Encbuf) Reset() { e.B = e.B[:0] }
func (e *Encbuf) Get() []byte { return e.B }
func (e *Encbuf) Len() int { return len(e.B) }

Expand Down Expand Up @@ -82,6 +87,55 @@ func (e *Encbuf) PutHash(h hash.Hash) {
e.B = h.Sum(e.B)
}

type bit bool

func (e *Encbuf) putBit(bit bit) {
if e.Count == 0 {
e.B = append(e.B, 0)
e.Count = 8
}

i := len(e.B) - 1

if bit {
e.B[i] |= 1 << (e.Count - 1)
}

e.Count--
}

func (e *Encbuf) putByte(byt byte) {
if e.Count == 0 {
e.B = append(e.B, 0)
e.Count = 8
}

i := len(e.B) - 1

// fill up e.B with e.Count bits from byt
e.B[i] |= byt >> (8 - e.Count)

e.B = append(e.B, 0)
i++
e.B[i] = byt << e.Count
}

func (e *Encbuf) PutBits(u uint64, nbits int) {
u <<= (64 - uint(nbits))
for nbits >= 8 {
byt := byte(u >> 56)
e.putByte(byt)
u <<= 8
nbits -= 8
}

for nbits > 0 {
e.putBit((u >> 63) == 1)
u <<= 1
nbits--
}
}

// Decbuf provides safe methods to extract data from a byte slice. It does all
// necessary bounds checking and advancing of the byte slice.
// Several datums can be extracted without checking for errors. However, before using
Expand Down
37 changes: 33 additions & 4 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"io"
"io/ioutil"
"math"
"math/bits"
"os"
"path/filepath"
"sort"
Expand Down Expand Up @@ -522,9 +523,24 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
w.buf2.Reset()
w.buf2.PutBE32int(len(refs))

for _, r := range refs {
w.buf2.PutBE32(r)
switch postingsType {
case 1:
for _, r := range refs {
w.buf2.PutBE32(r)
}
case 2:
// The base.
w.buf2.PutUvarint32(refs[0])
// The width.
width := bits.Len32(uint32(refs[len(refs)-1]-refs[0]))
w.buf2.PutByte(byte(width))
for _, r := range refs {
w.buf2.PutBits(uint64(r-refs[0]), width)
}
case 3:
writeDeltaBlockPostings(&w.buf2, refs)
}

w.uint32s = refs

w.buf1.Reset()
Expand Down Expand Up @@ -1028,8 +1044,21 @@ type Decoder struct {
func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
d := encoding.Decbuf{B: b}
n := d.Be32int()
l := d.Get()
return n, newBigEndianPostings(l), d.Err()
switch postingsType {
case 1:
l := d.Get()
return n, newBigEndianPostings(l), d.Err()
case 2:
base := uint32(d.Uvarint())
width := int(d.Byte())
l := d.Get()
return n, newBaseDeltaPostings(l, base, width, n), d.Err()
case 3:
l := d.Get()
return n, newDeltaBlockPostings(l, n), d.Err()
default:
return n, EmptyPostings(), d.Err()
}
}

// Series decodes a series entry from the given byte slice into lset and chks.
Expand Down
Loading