Repeated DNA Sequences
给一个由ATGC组成的字符串. 返回所有大于10个重复字符的字符串. 因为长度是10+, 已经固定了, 所以我们就每10检查一下是否有重复的即可. 查重复的时候, 我们用两个set记录, 然后做一个hash来匹配.
public class Solution {
public List<String> findRepeatedDnaSequences(String s) {
Set<Integer> first = new HashSet<Integer>();
Set<Integer> second = new HashSet<Integer>();
List<String> res = new ArrayList<String>();
char[] maps = new char[26];
maps['A' - 'A'] = 0;
maps['C' - 'A'] = 1;
maps['G' - 'A'] = 2;
maps['T' - 'A'] = 3;
for(int i = 0 ; i < s.length() - 9; i++) {
int v = 0;
for(int j = i; j <= i + 9; j++) {
v = v << 2;
v = v | maps[s.charAt(j) - 'A'];
}
if(!first.add(v) && second.add(v)){
res.add(s.substring(i,i+10));
}
}
return res;
}
}