Codebase list golang-github-beorn7-perks / ce16917a-9358-4e58-af81-caec1d3b7402/main topk / topk.go
ce16917a-9358-4e58-af81-caec1d3b7402/main

Tree @ce16917a-9358-4e58-af81-caec1d3b7402/main (Download .tar.gz)

topk.go @ce16917a-9358-4e58-af81-caec1d3b7402/mainraw · history · blame

package topk

import (
	"sort"
)

// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf

type Element struct {
	Value string
	Count int
}

type Samples []*Element

func (sm Samples) Len() int {
	return len(sm)
}

func (sm Samples) Less(i, j int) bool {
	return sm[i].Count < sm[j].Count
}

func (sm Samples) Swap(i, j int) {
	sm[i], sm[j] = sm[j], sm[i]
}

type Stream struct {
	k   int
	mon map[string]*Element

	// the minimum Element
	min *Element
}

func New(k int) *Stream {
	s := new(Stream)
	s.k = k
	s.mon = make(map[string]*Element)
	s.min = &Element{}

	// Track k+1 so that less frequenet items contended for that spot,
	// resulting in k being more accurate.
	return s
}

func (s *Stream) Insert(x string) {
	s.insert(&Element{x, 1})
}

func (s *Stream) Merge(sm Samples) {
	for _, e := range sm {
		s.insert(e)
	}
}

func (s *Stream) insert(in *Element) {
	e := s.mon[in.Value]
	if e != nil {
		e.Count++
	} else {
		if len(s.mon) < s.k+1 {
			e = &Element{in.Value, in.Count}
			s.mon[in.Value] = e
		} else {
			e = s.min
			delete(s.mon, e.Value)
			e.Value = in.Value
			e.Count += in.Count
			s.min = e
		}
	}
	if e.Count < s.min.Count {
		s.min = e
	}
}

func (s *Stream) Query() Samples {
	var sm Samples
	for _, e := range s.mon {
		sm = append(sm, e)
	}
	sort.Sort(sort.Reverse(sm))

	if len(sm) < s.k {
		return sm
	}

	return sm[:s.k]
}