In this post, we will solve HackerRank Save Humanity Problem Solution.
Oh!! Mankind is in trouble again. This time, it’s a deadly disease spreading at a rate never seen before. The need of the hour is to set up efficient virus detectors. You are the lead at Central Hospital and you need to find a fast and reliable way to detect the footprints of the virus DNA in that of the patient.
The DNA of the patient as well as of the virus consists of lowercase letters. Since the collected data is raw, there may be some errors. You will need to find all substrings in the patient DNA that either exactly match the virus DNA or have at most one mismatch, i.e., a difference in at most one location.
For example, “aa
” and “aa
” are matching, “ab
” and “aa
” are matching, while “abb
” and “bab
” are not.
Function Description
Complete the virusIndices function in the editor below. It should print a list of space-separated integers that represent the starting indices of matching substrings in increasing order, or No match!
.
virusIndices has the following parameter(s):
- p: a string that represents patient DNA
- v: a string that represents virus DNA
Input Format
The first line contains an integer t, the number of test cases.
. Each of the next lines contains two space-separated strings p (the patient DNA) and v (the virus DNA).
Output Format
For each test case, output a single line containing a space-delimited list of starting indices (0 -indexed) of substrings of p which are matching with v according to the condition mentioned above. The indices have to be in increasing order. If there is no matching substring, output No Match!.
Sample Input 0
3 abbab ba hello world banana nan
Sample Output 0
1 2 No Match! 0 2
Explanation 0
For the first case, the substrings of p starting at indices 1 and 2 are “bb” and “ba” and they are matching with the string which is “ba”.
For the second case, there are no matching substrings so the output is No Match!.
For the third case, the substrings of p starting at indices 0 and 2 are “ban” and “nan” and they are matching with the string which is “nan”.
Sample Input 1
3 cgatcg gc atcgatcga cgg aardvark ab
Sample Output 1
1 3 2 6 0 1 5
Explanation 1
For the first case, the substrings of p starting at indices 1 and 3 are “ga” and “gc” and they
are matching with the string which is “gc”.
For the second case, the substrings of p starting at indices 2 and 6 are “cga” and “cga” and they are matching with the string which is “cgg”.
For the third case, the substrings of p starting at indices 0, 1 and 5 are “aa”, “ar” and “ar” and they are matching with the string which is “ab”.

Save Humanity C Solution
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>
#include <stdbool.h>
#include <errno.h>
#include <time.h>
#define kMaxSize 100001
#define kMaxMismatch 1
typedef long long int lli;
int findDna8b(char *p, char *v, int vc);
int main()
{
// The code below sets files for stdin and stdout, and times the results for testing.
// If left in it will cause the solution to fail at HackerRank.
//char test_file_path[] = "";
//freopen(test_file_path, "r", stdin);
//char test_file_out[] = "";
//freopen(test_file_out, "w", stdout);
// Start the clock.
//static double time_consumed = 0;
//clock_t start, end;
//start = clock();
// Allocate memory for strings.
char *p = (char*)malloc(kMaxSize * sizeof(char));
char *v = (char*)malloc(kMaxSize * sizeof(char));
// Test cases.
int tc;
scanf("%d", &tc);
while (0 < tc--)
{
// Load strings.
scanf("%s %s", p, v);
int pc = (int)strlen(p);
int vc = (int)strlen(v);
// Look for v in p. Print starting index of each match.
int c = (pc-vc);
int matched = 0;
for (int i = 0; i <= c; i++){
if (findDna8b(&p[i], v, vc) == 1){
matched++;
printf("%d ", i);
}
}
// We have to indicate if no matches were found.
if (matched <= 0)
printf("No Match!\n");
else
printf("\n");
}
// Cleanup
free(p);
free(v);
// Comment out or this will fail HackerRank.
//end = clock();
//time_consumed += (double)(end - start);
//printf("%f\n", time_consumed / CLOCKS_PER_SEC);
return 0;
}
int findDna8b(char *p, char *v, int vc)
{
// Cast strings to lli pointers.
lli *p8 = (lli*)p;
lli *v8 = (lli*)v;
int mismatch = 0;
int c = vc/8;
int i;
for (i = 0; i < c; i++)
{
if (p8[i] != v8[i])
{
int jc = (i*8)+8;
for (int j = i*8; j < jc; j++)
{
if (p[j] != v[j]){
mismatch++;
if (mismatch > kMaxMismatch)
return -1;
}
}
}
}
for (int j = i*8; j < vc; j++)
{
if (p[j] != v[j]){
mismatch++;
}
}
if (mismatch > kMaxMismatch)
return -1;
else
return 1;
}
Save Humanity C++ Solution
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
vector<int> Z(string s) {
vector<int> z = {0};
int l = 0, r = 0;
for (int i = 1; i < s.size(); i++) {
if (i >= r) {
r = l = i;
while (r < s.size() && s[r-i] == s[r]) r++;
z.push_back(r-i);
} else {
if (z[i-l] < r-i) {
z.push_back(z[i-l]);
} else {
l = i;
while (r < s.size() && s[r-i] == s[r]) r++;
z.push_back(r-i);
}
}
}
return z;
}
int main() {
ios::sync_with_stdio(0); cin.tie(0);
int t;
cin >> t;
while (t--) {
string text, pat;
cin >> text >> pat;
auto a = Z(pat+"@"+text);
reverse(text.begin(),text.end());
reverse(pat.begin(),pat.end());
auto b = Z(pat+"@"+text);
vector<int> ans;
for (int i = 0; i+pat.size() <= text.size(); i++) {
if (a[pat.size()+1+i]+b[pat.size()+1+text.size()-(i+pat.size())]+1 >= pat.size())
ans.push_back(i);
}
if (ans.empty()) cout << "No Match!" << endl;
else {
for (int i : ans) cout << i << ' ';
cout << endl;
}
}
}
Save Humanity C Sharp Solution
using System.CodeDom.Compiler;
using System.Collections.Generic;
using System.Collections;
using System.ComponentModel;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.Serialization;
using System.Text.RegularExpressions;
using System.Text;
using System;
class Result
{
static int [] Zvalue(string s)
{
int[] z = new int[s.Length];
int n = s.Length;
int L = 0, R = 0;
for (int i = 1; i < n; i++)
{
if (i > R)
{
L = R = i;
while (R < n && s[R - L] == s[R]) R++;
z[i] = R - L; R--;
}
else
{
int k = i - L;
if (z[k] < R - i + 1) z[i] = z[k];
else
{
L = i;
while (R < n && s[R - L] == s[R]) R++;
z[i] = R - L; R--;
}
}
}
return z;
}
public static void virusIndices(string P, string V) {
string S1 = V + "#" + P;
string rV = new string(V.Reverse ().ToArray ());
string rP = new string(P.Reverse().ToArray());
string S2 = rV + "#" + rP;
int[] Z1 = Zvalue (S1);
int[] Z2 = Zvalue (S2);
int N = V.Length;
int M = P.Length;
int L = S1.Length;
List<int> results = new List<int> ();
for (int i = 0; i <= M - N; i++)
{
if (Z1[N+1+i] == N)
results.Add (i);
else
{
int m1 = Z1[N + 1 + i];
int m2 = Z2[L - (i + N)];
//if (i == 0)
// Console.WriteLine("{0}+{1}", m1, m2);
if (m1 + m2 + 1 >= N)
results.Add (i);
}
}
Console.WriteLine (results.Count()>0 ?string.Join (" ", results.Select (x => x.ToString ()).ToArray ()):"No Match!");
}
}
class Solution
{
public static void Main(string[] args)
{
int t = Convert.ToInt32(Console.ReadLine().Trim());
for (int tItr = 0; tItr < t; tItr++)
{
string[] firstMultipleInput = Console.ReadLine().TrimEnd().Split(' ');
string p = firstMultipleInput[0];
string v = firstMultipleInput[1];
Result.virusIndices(p, v);
}
}
}
Save Humanity Java Solution
import java.io.*;
import java.math.*;
import java.text.*;
import java.util.*;
import java.util.regex.*;
import java.util.stream.Collectors;
public class Solution {
/*
* Complete the virusIndices function below.
*/
static List<Integer> virusIndices(String p, String v) {
List<Integer> z = z_function(v + "#" + p);
List<Integer> z_reversed = z_function(new StringBuilder(v).reverse().toString() + "#" + new StringBuilder(p).reverse().toString());
List<Integer> answer = new ArrayList<>();
for (int i = 0; i <= p.length() - v.length(); ++i) {
if (z.get(i + v.length() + 1) + z_reversed.get(p.length() - i + 1) >= v.length() - 1) {
answer.add(i);
}
}
return answer;
}
private static List<Integer> z_function(String s) {
List<Integer> z = new ArrayList(s.length());
z.add(0);
int l = 0;
int r = 0;
for (int i = 1; i < s.length(); ++i) {
int z_i = 0;
if (i <= r)
z_i = r - i + 1 < z.get(i - l) ? r - i + 1 : z.get(i - l);
while (i + z_i < s.length() && s.charAt(z_i) == s.charAt(i + z_i))
++z_i;
if (i + z_i - 1 > r) {
l = i;
r = i + z_i - 1;
}
z.add(z_i);
}
return z;
}
private static final Scanner scanner = new Scanner(System.in);
public static void main(String[] args) {
int t = Integer.parseInt(scanner.nextLine().trim());
for (int tItr = 0; tItr < t; tItr++) {
String[] pv = scanner.nextLine().split(" ");
String p = pv[0];
String v = pv[1];
List<Integer> answer = virusIndices(p, v);
if (answer.size() == 0) {
System.out.println("No Match!");
} else {
System.out.println(answer.stream().map(String::valueOf).collect(Collectors.joining(" ")));
}
}
}
}
Save Humanity JavaScript Solution
'use strict';
process.stdin.resume();
process.stdin.setEncoding('utf-8');
let inputString = '';
let currentLine = 0;
process.stdin.on('data', inputStdin => {
inputString += inputStdin;
});
process.stdin.on('end', _ => {
inputString = inputString.trim().split('\n').map(str => str.trim());
main();
});
function readLine() {
return inputString[currentLine++];
}
/*
* Complete the virusIndices function below.
*/
function stringDiffThreshold(s1, s2, max = 0, score = 0) {
var len = s1.length;
if (s1 !== s2 && len > 1) {
var len2 = Math.ceil(len / 2);
score = stringDiffThreshold(s1.substr(0, len2), s2.substr(0, len2), max, score);
if (score > max) return score;
score = stringDiffThreshold(s1.substr(len2), s2.substr(len2), max, score);
if (score > max) return score;
} else if (s1 !== s2 && len === 1) {
//console.log(len + '/:' + s1 + '!==' + s2);
return score + 1;
} else {
//console.log(len + '/:' + s1 + '===' + s2);
}
return score;
}
function virusIndices(p, v) {
var out = [];
for (var i = 0; i < p.length - v.length+1; i++) {
if (stringDiffThreshold(p.substr(i, v.length), v, 1) < 2)
out.push(i);
}
if (out.length > 0)
console.log(out.join(' '));
else
console.log('No Match!');
}
function main() {
const t = parseInt(readLine(), 10);
for (let tItr = 0; tItr < t; tItr++) {
const pv = readLine().split(' ');
const p = pv[0];
const v = pv[1];
virusIndices(p, v);
}
}
Save Humanity Python Solution
#!/bin/python3
import os
import sys
#
# Complete the virusIndices function below.
#
def binSearch(smaller, bigger):
lo = 0
hi = len(smaller)
# binary search for prefix
while lo < hi:
# +1 for even lengths
mid = ((hi - lo + 1) // 2) + lo
if smaller[:mid] == bigger[:mid]:
# prefixes equal
lo = mid
else:
# prefixes not equal
hi = mid - 1
return lo
def virusIndices(p, v):
#
# Print the answer for this test case in a single line
#
'''
This is about pattern searching, an important problem in computer science.
'''
txt = p
pat = v
N = len(txt)
M = len(pat)
err_allowance = 1
matches=[]
debug = False
if debug: print(f"N {N} M {M}")
txt_run_starts = [] #int run_start
txt_run_ch = [] #str ch
'''
txt_run_matches_pat = [] #bool matches_pat
ch_pat_match = {} # str ch : bool matches_pat
for i in range(26):
ch = chr(ord("a") + i)
ch_pat_match[ch] = pat.count(ch) >= M-err_allowance
'''
i = 0
while i < N:
ch = txt[i]
streak_start = i
i += 1
while i < N and txt[i] == ch:
i += 1
txt_run_starts.append(streak_start)
txt_run_ch.append(ch)
#txt_run_matches_pat.append(ch_pat_match[ch])
txt_run_starts.append(N)
if debug: print(f"len(txt_run_starts) {len(txt_run_starts)} N {N}")
txt_run_lens = [txt_run_starts[i]-txt_run_starts[i-1] for i in range(1,len(txt_run_starts))]
if debug: print(f"max txt_run_lens {max(txt_run_lens)}")
##print(f"txt_run_starts\n{txt_run_starts}")
pat_run_starts = [] #int
pat_run_ch = [] #str ch
j = 0
while j < M:
ch = pat[j]
streak_start = j
j += 1
while j < M and pat[j] == ch:
j += 1
pat_run_starts.append(streak_start)
pat_run_ch.append(ch)
pat_run_starts.append(M)
if debug: print(f"len(pat_run_starts) {len(pat_run_starts)} M {M}")
pat_run_lens = [pat_run_starts[i]-pat_run_starts[i-1] for i in range(1,len(pat_run_starts))]
if debug: print(f"max pat_run_lens {max(pat_run_lens)}")
##print(f"pat_run_starts\n{pat_run_starts}")
# KMP preprocessing:
lps = [0]*M
disable_KMP = True
#LPS example: ABXAB gives lps of [0 0 0 1 2]
#Search examples using this pattern with error_allowance == 1
#txt: ABXABXABX
#pat: ABXAB
#matches:
# match found at i-j=0 with 0 errors with j=0..5, lps[j-1]=2
# match found at i-j=3 with 0 errors with j=2..5, lps[j-1]=2
# match found at i-j=5 with 0 errors with j=2..5, lps[j-1]=2
# exit on i-j=5 which has reached N-M+1=9-5+1=5
#txt: ABCABCDBXAB
#pat: ABXAB
# matches found at 0, 3, 6
if disable_KMP:
pass
elif max(pat_run_lens) < M ** 0.5:
# pat runs aren't very long - use standard preprocessing for KMP
for j in range(M):
k = 0
while k < j:
if pat[:k+1] != pat[j-k:j+1]:
break
k += 1
lps[j] = k
else:
# pat runs are sizable - use skip-ahead preprocessing for KMP
suffix_run_at_k_eq_0 = 0
for j in range(M):
if debug and j%5000==0: print(f"j {j}")
# check for degenerate case of all one character
if 0 < j < pat_run_lens[0]:
lps[j] = j
continue
if j >= pat_run_starts[suffix_run_at_k_eq_0 + 1]:
suffix_run_at_k_eq_0 += 1
# check for degenerate case where j has 2 bracketing runs of unequal length and identical character
if j > pat_run_lens[0]:
len_first_prefix_run = pat_run_lens[0]
len_last_suffix_run = j+1 - pat_run_starts[suffix_run_at_k_eq_0]
if len_first_prefix_run != len_last_suffix_run and pat_run_ch[0] == pat_run_ch[suffix_run_at_k_eq_0]:
lps[j] = min(len_first_prefix_run, len_last_suffix_run)
continue
# handle more general case of matching lists of prefix and suffix runs
prefix_runs = [0]
suffix_runs = [suffix_run_at_k_eq_0] # uses reverse order
k = 0
while k < j:
#if pat[:k+1] != pat[j-k:j+1]:
# break
if len(prefix_runs) != len(suffix_runs): # mismatch in number of runs in comparators
break
len_last_prefix_run = k+1 - pat_run_starts[prefix_runs[-1]]
len_last_suffix_run = j+1 - max(j-k, pat_run_starts[suffix_runs[0]])
if len_last_prefix_run != len_last_suffix_run or pat_run_ch[prefix_runs[-1]] != pat_run_ch[suffix_runs[0]]:
# special case comparison for tail stub of prefix and tail stub of suffix
break
if len(prefix_runs)>1:
len_first_prefix_run = min(k+1, pat_run_lens[prefix_runs[0]])
len_first_suffix_run = min(j+1, pat_run_starts[suffix_runs[-1] + 1]) - (j-k)
if len_first_prefix_run != len_first_suffix_run or pat_run_ch[prefix_runs[0]] != pat_run_ch[suffix_runs[-1]]:
# special case comparison for head of prefix (not nec. a stub) head stub of suffix
break
for i in range(1, len(prefix_runs) - 1):
# compare all intermediate runs of prefix and suffix (length and run character)
if pat_run_lens[prefix_runs[i]] != pat_run_lens[suffix_runs[-i-1]] or pat_run_ch[prefix_runs[i]] != pat_run_ch[suffix_runs[-i-1]]:
break
k += 1
if k >= pat_run_starts[prefix_runs[-1] + 1]:
prefix_runs.append(prefix_runs[-1] + 1)
if j-k < pat_run_starts[suffix_runs[-1]]:
suffix_runs.append(suffix_runs[-1] - 1)
lps[j] = k
debuggy = 0
#if False: # DEBUG - force skip-ahead branch
if max(txt_run_lens) < N ** 0.5 and max(pat_run_lens) < M ** 0.5:
#runs aren't very long - try naive approach or KMP
# KMP pattern search with error allowance:
i, j = 0, 0
err_count = 0
if disable_KMP:
use_block_compare = True
if not use_block_compare:
# naive algorithm
while i-j <= N-M:
got_match = txt[i]==pat[j]
if got_match or err_count < err_allowance:
i, j = i+1, j+1
if j==M:
matches.append(i-j)
i, j = i-j+1, 0
err_count=0
elif not got_match:
err_count+=1
else:
i, j = i-j+1, 0
err_count=0
else:
# naive algorithm, but using block compare of strings
block_size = 64
while i-j <= N-M:
while i+block_size<=N and j+block_size<=M and txt[i:i+block_size]==pat[j:j+block_size]:
i += block_size
j += block_size
if j==M:
matches.append(i-j)
i, j = i-j+1, 0
err_count=0
i_start = i
while (i==i_start or i % block_size != 0) and i-j <= N-M:
got_match = txt[i]==pat[j]
if got_match or err_count < err_allowance:
i, j = i+1, j+1
if j==M:
matches.append(i-j)
i, j = i-j+1, 0
err_count=0
elif not got_match:
err_count+=1
else:
i, j = i-j+1, 0
err_count=0
else:
'''
# KMP algorithm - no error allowance
while i-j <= N-M:
if txt[i]==pat[j]:
i, j = i+1, j+1
if j==M:
matches.append[i-j]
j=lps[j-1]
elif j==0:
i = i+1
else:
j=lps[j-1]
'''
# KMP algorithm
while i-j <= N-M:
got_match = txt[i]==pat[j]
if got_match or err_count < err_allowance:
i, j = i+1, j+1
if not got_match:
err_count+=1
if j==M:
matches.append(i-j)
if err_count > 0 or disable_KMP:
i = i-j + 1
j = 0
err_count = 0
else:
j=lps[j-1]
else:
i = i-j + 1
j = 0
err_count = 0
else:
#there are some runs of significant length - try optimizing for this
# KMP algorithm with err_allowance and skip-ahead logic
i, j = 0, 0
err_count = 0
i_txt_run, j_pat_run = 0, 0
while i-j <= N-M:
while True:
txt_run_len = txt_run_starts[i_txt_run + 1] - i
pat_run_len = pat_run_starts[j_pat_run + 1] - j
if txt_run_ch[i_txt_run] != pat_run_ch[j_pat_run]:
break
max_run_len = min(txt_run_len, pat_run_len)
skip_len = min(M-j, max_run_len)
i, j = i+skip_len, j+skip_len
if txt_run_len == skip_len:
i_txt_run += 1
if pat_run_len == skip_len:
j_pat_run += 1
if j==M:
matches.append(i-j)
if err_count > 0 or disable_KMP:
i = i-j + 1
j = 0
err_count = 0
while i < txt_run_starts[i_txt_run]:
i_txt_run -= 1
j_pat_run = 0
else:
j=lps[j-1]
while j < pat_run_starts[j_pat_run]:
j_pat_run -= 1
if i-j>N-M:
break
if i-j>N-M:
break
# we have a mismatch
if err_count < err_allowance:
i, j = i+1, j+1
err_count+=1
if i>= txt_run_starts[i_txt_run + 1]:
i_txt_run += 1
if j==M:
matches.append(i-j)
if err_count > 0 or disable_KMP:
i = i-j + 1
j = 0
err_count = 0
while i < txt_run_starts[i_txt_run]:
i_txt_run -= 1
j_pat_run = 0
else:
j=lps[j-1]
while j < pat_run_starts[j_pat_run]:
j_pat_run -= 1
else:
if j>= pat_run_starts[j_pat_run + 1]:
j_pat_run += 1
else:
i = i-j + 1
j = 0
err_count = 0
while i < txt_run_starts[i_txt_run]:
i_txt_run -= 1
j_pat_run = 0
print(*matches) if len(matches)>0 else print("No Match!")
if __name__ == '__main__':
global count_of_target_M_cases
count_of_target_M_cases = 0
sys_stdin_orig = sys.stdin
if len(sys.argv) > 1 and sys.argv[1]=="<":
sys.stdin = open(sys.argv[2], "r")
t = int(input())
for t_itr in range(t):
pv = input().split()
p = pv[0]
v = pv[1]
virusIndices(p, v)