Substring with Concatenation of All Words
Find all start indices where s contains a concatenation of every word in `words` in any order.
6 min read
sliding-window hash-map string
Problem#
words are all the same length w. Find every index i such that s.substr(i, m*w) is a concatenation of all of words in some order.
Examples#
s = "barfoothefoobarman", words = ["foo","bar"]→[0,9]
Constraints#
1 <= words.length, w <= 30,1 <= s.length <= 10^4.
Hints#
Hint 1
Group alignment offsets by
i mod w; within each offset run a word-level sliding window. Approach#
Bucket the search by start-modulo-w: in each offset, slide a window of m word-slots. Maintain frequency counts of seen words; on mismatch (extra word or unseen word) advance the window by one word.
Solution#
class Solution {public: vector<int> findSubstring(string s, vector<string>& words) { const int m = words.size(), w = words[0].size(); const int totalLen = m * w; vector<int> ans; if ((int)s.size() < totalLen) return ans; unordered_map<string,int> need; for (auto& wd : words) ++need[wd]; for (int offset = 0; offset < w; ++offset) { int left = offset, count = 0; unordered_map<string,int> have; for (int right = offset; right + w <= (int)s.size(); right += w) { string token = s.substr(right, w); if (!need.count(token)) { have.clear(); count = 0; left = right + w; continue; } ++have[token]; ++count; while (have[token] > need[token]) { --have[s.substr(left, w)]; --count; left += w; } if (count == m) { ans.push_back(left); --have[s.substr(left, w)]; --count; left += w; } } } return ans; }};func findSubstring(s string, words []string) []int { m, w := len(words), len(words[0]) totalLen := m * w ans := []int{} if len(s) < totalLen { return ans } need := map[string]int{} for _, wd := range words { need[wd]++ } for offset := 0; offset < w; offset++ { left, count := offset, 0 have := map[string]int{} for right := offset; right+w <= len(s); right += w { token := s[right : right+w] if _, ok := need[token]; !ok { have = map[string]int{} count = 0 left = right + w continue } have[token]++ count++ for have[token] > need[token] { have[s[left:left+w]]-- count-- left += w } if count == m { ans = append(ans, left) have[s[left:left+w]]-- count-- left += w } } } return ans}from collections import Counter, defaultdictfrom typing import List
class Solution: def findSubstring(self, s: str, words: List[str]) -> List[int]: m, w = len(words), len(words[0]) total_len = m * w ans: List[int] = [] if len(s) < total_len: return ans need = Counter(words) for offset in range(w): left = offset count = 0 have: dict = defaultdict(int) right = offset while right + w <= len(s): token = s[right:right + w] if token not in need: have.clear() count = 0 left = right + w else: have[token] += 1 count += 1 while have[token] > need[token]: have[s[left:left + w]] -= 1 count -= 1 left += w if count == m: ans.append(left) have[s[left:left + w]] -= 1 count -= 1 left += w right += w return ansfunction findSubstring(s, words) { const m = words.length, w = words[0].length; const totalLen = m * w; const ans = []; if (s.length < totalLen) return ans; const need = new Map(); for (const wd of words) need.set(wd, (need.get(wd) || 0) + 1); for (let offset = 0; offset < w; offset++) { let left = offset, count = 0; const have = new Map(); for (let right = offset; right + w <= s.length; right += w) { const token = s.substring(right, right + w); if (!need.has(token)) { have.clear(); count = 0; left = right + w; continue; } have.set(token, (have.get(token) || 0) + 1); count++; while (have.get(token) > need.get(token)) { const leftTok = s.substring(left, left + w); have.set(leftTok, have.get(leftTok) - 1); count--; left += w; } if (count === m) { ans.push(left); const leftTok = s.substring(left, left + w); have.set(leftTok, have.get(leftTok) - 1); count--; left += w; } } } return ans;}class Solution { public List<Integer> findSubstring(String s, String[] words) { int m = words.length, w = words[0].length(); int totalLen = m * w; List<Integer> ans = new ArrayList<>(); if (s.length() < totalLen) return ans; Map<String, Integer> need = new HashMap<>(); for (String wd : words) need.merge(wd, 1, Integer::sum); for (int offset = 0; offset < w; offset++) { int left = offset, count = 0; Map<String, Integer> have = new HashMap<>(); for (int right = offset; right + w <= s.length(); right += w) { String token = s.substring(right, right + w); if (!need.containsKey(token)) { have.clear(); count = 0; left = right + w; continue; } have.merge(token, 1, Integer::sum); count++; while (have.get(token) > need.get(token)) { String leftTok = s.substring(left, left + w); have.merge(leftTok, -1, Integer::sum); count--; left += w; } if (count == m) { ans.add(left); String leftTok = s.substring(left, left + w); have.merge(leftTok, -1, Integer::sum); count--; left += w; } } } return ans; }}function findSubstring(s: string, words: string[]): number[] { const m: number = words.length, w: number = words[0].length; const totalLen: number = m * w; const ans: number[] = []; if (s.length < totalLen) return ans; const need: Map<string, number> = new Map(); for (const wd of words) need.set(wd, (need.get(wd) || 0) + 1); for (let offset = 0; offset < w; offset++) { let left: number = offset, count: number = 0; const have: Map<string, number> = new Map(); for (let right = offset; right + w <= s.length; right += w) { const token: string = s.substring(right, right + w); if (!need.has(token)) { have.clear(); count = 0; left = right + w; continue; } have.set(token, (have.get(token) || 0) + 1); count++; while ((have.get(token) as number) > (need.get(token) as number)) { const leftTok: string = s.substring(left, left + w); have.set(leftTok, (have.get(leftTok) as number) - 1); count--; left += w; } if (count === m) { ans.push(left); const leftTok: string = s.substring(left, left + w); have.set(leftTok, (have.get(leftTok) as number) - 1); count--; left += w; } } } return ans;}Editorial#
The word-aligned sliding window beats a naïve substring-by-substring check by a factor of w. Three loop bodies: unknown word → reset; surplus copy → shrink left; full match → record and shift. Each s.substr(_, w) call costs O(w); for tighter performance switch to integer hashes of the words.
Complexity#
- Time: O(n × w).
- Space: O(m × w).
Concept revision#
Related#