Top K Frequent Words
Return the k most frequent words, ties broken lexicographically — counter + heap.
6 min read
hash-map heap sorting
Problem#
Given an array of words and integer k, return the k most frequent words. Sort by frequency descending; on ties, sort alphabetically.
Examples#
words = ["i","love","leetcode","i","love","coding"], k = 2→["i","love"].words = ["the","day","is","sunny","the","the","the","sunny","is","is"], k = 4→["the","is","sunny","day"].
Constraints#
1 <= words.length <= 500,1 <= words[i].length <= 10.
Hints#
Hint
Min-heap of size k with comparator: higher freq is “smaller”, and within equal freq, larger string is “smaller”.
Approach#
Count occurrences with a hash map. Maintain a size-k min-heap whose ordering inverts the desired output so the “worst” element is at top — when the heap exceeds k, pop. Drain the heap at the end and reverse.
Solution#
class Solution {public: vector<string> topKFrequent(vector<string>& words, int k) { unordered_map<string, int> cnt; for (auto& w : words) ++cnt[w]; auto cmp = [](const pair<int,string>& a, const pair<int,string>& b) { if (a.first != b.first) return a.first > b.first; // smaller freq sits at top return a.second < b.second; // larger word sits at top }; priority_queue<pair<int,string>, vector<pair<int,string>>, decltype(cmp)> pq(cmp); for (auto& [w, c] : cnt) { pq.push({c, w}); if ((int)pq.size() > k) pq.pop(); } vector<string> ans(k); for (int i = k - 1; i >= 0; --i) { ans[i] = pq.top().second; pq.pop(); } return ans; }};import "container/heap"
type wordItem struct { freq int word string}type wordHeap []wordItem
func (h wordHeap) Len() int { return len(h) }func (h wordHeap) Less(i, j int) bool { if h[i].freq != h[j].freq { return h[i].freq < h[j].freq // smaller freq at top } return h[i].word > h[j].word // larger word at top}func (h wordHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }func (h *wordHeap) Push(x interface{}) { *h = append(*h, x.(wordItem)) }func (h *wordHeap) Pop() interface{} { o := *h; n := len(o); x := o[n-1]; *h = o[:n-1]; return x }
func topKFrequent(words []string, k int) []string { cnt := map[string]int{} for _, w := range words { cnt[w]++ } pq := &wordHeap{} for w, c := range cnt { heap.Push(pq, wordItem{c, w}) if pq.Len() > k { heap.Pop(pq) } } ans := make([]string, k) for i := k - 1; i >= 0; i-- { ans[i] = heap.Pop(pq).(wordItem).word } return ans}import heapqfrom collections import Counterfrom typing import List
class Solution: def topKFrequent(self, words: List[str], k: int) -> List[str]: cnt = Counter(words) # heap ordering: smaller freq at top; on tie, larger word at top # store (freq, neg-lex-word) won't work for strings; use a wrapper heap: List[tuple] = [] class Item: __slots__ = ('freq', 'word') def __init__(self, freq: int, word: str): self.freq = freq self.word = word def __lt__(self, other: 'Item') -> bool: if self.freq != other.freq: return self.freq < other.freq return self.word > other.word
for w, c in cnt.items(): heapq.heappush(heap, Item(c, w)) if len(heap) > k: heapq.heappop(heap) ans: List[str] = [''] * k for i in range(k - 1, -1, -1): ans[i] = heapq.heappop(heap).word return ansclass WordHeap { constructor() { this.h = []; } size() { return this.h.length; } // ordering: smaller freq at top; on tie, larger word at top cmp(a, b) { if (a[0] !== b[0]) return a[0] - b[0]; return a[1] < b[1] ? 1 : (a[1] > b[1] ? -1 : 0); } push(x) { this.h.push(x); let i = this.h.length - 1; while (i > 0) { const p = (i - 1) >> 1; if (this.cmp(this.h[p], this.h[i]) <= 0) break; [this.h[p], this.h[i]] = [this.h[i], this.h[p]]; i = p; } } pop() { const top = this.h[0]; const last = this.h.pop(); if (this.h.length) { this.h[0] = last; let i = 0; const n = this.h.length; while (true) { let l = i * 2 + 1, r = i * 2 + 2, s = i; if (l < n && this.cmp(this.h[l], this.h[s]) < 0) s = l; if (r < n && this.cmp(this.h[r], this.h[s]) < 0) s = r; if (s === i) break; [this.h[s], this.h[i]] = [this.h[i], this.h[s]]; i = s; } } return top; }}
var topKFrequent = function(words, k) { const cnt = new Map(); for (const w of words) cnt.set(w, (cnt.get(w) || 0) + 1); const pq = new WordHeap(); for (const [w, c] of cnt) { pq.push([c, w]); if (pq.size() > k) pq.pop(); } const ans = new Array(k); for (let i = k - 1; i >= 0; i--) ans[i] = pq.pop()[1]; return ans;};class Solution { public List<String> topKFrequent(String[] words, int k) { Map<String, Integer> cnt = new HashMap<>(); for (String w : words) cnt.merge(w, 1, Integer::sum); // min-heap: smaller freq at top; on tie, larger word at top PriorityQueue<Map.Entry<String, Integer>> pq = new PriorityQueue<>((a, b) -> { if (!a.getValue().equals(b.getValue())) return a.getValue() - b.getValue(); return b.getKey().compareTo(a.getKey()); }); for (Map.Entry<String, Integer> e : cnt.entrySet()) { pq.offer(e); if (pq.size() > k) pq.poll(); } String[] ans = new String[k]; for (int i = k - 1; i >= 0; i--) ans[i] = pq.poll().getKey(); return Arrays.asList(ans); }}class WordHeap { h: [number, string][] = []; size(): number { return this.h.length; } cmp(a: [number, string], b: [number, string]): number { if (a[0] !== b[0]) return a[0] - b[0]; return a[1] < b[1] ? 1 : (a[1] > b[1] ? -1 : 0); } push(x: [number, string]): void { this.h.push(x); let i: number = this.h.length - 1; while (i > 0) { const p: number = (i - 1) >> 1; if (this.cmp(this.h[p], this.h[i]) <= 0) break; [this.h[p], this.h[i]] = [this.h[i], this.h[p]]; i = p; } } pop(): [number, string] { const top: [number, string] = this.h[0]; const last: [number, string] = this.h.pop() as [number, string]; if (this.h.length) { this.h[0] = last; let i: number = 0; const n: number = this.h.length; while (true) { let l: number = i * 2 + 1, r: number = i * 2 + 2, s: number = i; if (l < n && this.cmp(this.h[l], this.h[s]) < 0) s = l; if (r < n && this.cmp(this.h[r], this.h[s]) < 0) s = r; if (s === i) break; [this.h[s], this.h[i]] = [this.h[i], this.h[s]]; i = s; } } return top; }}
function topKFrequent(words: string[], k: number): string[] { const cnt: Map<string, number> = new Map(); for (const w of words) cnt.set(w, (cnt.get(w) || 0) + 1); const pq: WordHeap = new WordHeap(); for (const [w, c] of cnt) { pq.push([c, w]); if (pq.size() > k) pq.pop(); } const ans: string[] = new Array(k); for (let i = k - 1; i >= 0; i--) ans[i] = pq.pop()[1]; return ans;}Editorial#
The comparator’s two-axis inversion ensures pop() always removes the candidate that loses the tie. Heap of size k keeps memory at O(k); total operations are N log k. For exact tie ordering, the lexicographic comparison on string matches the spec exactly.
Complexity#
- Time: O(N log k).
- Space: O(N) for the counter.
Concept revision#
Related#