HackerRank Bear and Steady Gene Solution
In this post, we will solve HackerRank Bear and Steady Gene Problem Solution.
A gene is represented as a string of length n (where n is divisible by 4), composed of the letters A, C, T. and G. It is considered to be steady if each of the four letters occurs exactly times. For example, GACT and AAGTGCCT are both steady genes.
Bear Limak is a famous biotechnology scientist who specializes in modifying bear DNA to make it steady. Right now, he is examining a gene represented as a string gene. It is not necessarily steady. Fortunately, Limak can choose one (maybe empty) substring of gene and replace it with any string of the same length.
Modifying a large substring of bear genes can be dangerous. Given a string gene, can you help Limak find the length of the smallest possible substring that he can replace to make gene a steady gene?
Note: A substring of a string s is a subsequence made up of zero or more contiguous characters of $.
As an example, consider gene = ACTGAAAG. The substring AA just before or after G can be replaced with CT or TC. One selection would create ACTGACTG.
Function Description
Complete the steadyGene function in the editor below. It should return an integer that represents the length of the smallest substring to replace.
steadyGene has the following parameter:
- gene: a string
Input Format
The first line contains an interger ʼn divisible by 4, that denotes the length of a string gene.
The second line contains a string gene of length n.
Output Format
Print the length of the minimum length substring that can be replaced to make gene stable.
Sample Input
8
GAAATAAA
Sample Output
5
Explanation
One optimal solution is to replace AAATA with TTCCG resulting in GTTCCGAA.
The replaced substring has length 5.
Bear and Steady Gene C Solution
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
int is_balanced(int *deviation, int *l_idx, int *r_idx) {
int i = 0;
while (i < 4 && deviation[i] <= 0 || (deviation[i] > 0 && r_idx[i] - l_idx[i] >= deviation[i]))
i++;
return i == 4;
}
int solve(int *si, int *so, int n, int *deviation) {
if (deviation[0] == 0 && deviation[1] == 0 && deviation[2] == 0 && deviation[3] == 0) return 0;
int l = 0, r = 0;
int l_idx[] = {0, 0, 0, 0};
int r_idx[] = {0, 0, 0, 0};
int min_len = 10e+7;
while (r < n) {
while (r < n && !is_balanced(deviation, l_idx, r_idx))
r_idx[si[r]] = so[r++];
while (is_balanced(deviation, l_idx, r_idx))
l_idx[si[l]] = so[l++];
if (r - l + 1 < min_len)
min_len = r - l + 1;
//printf("(%d,%d):", l, r); for (int i = 0; i < 4; i++) printf(" (%d,%d)", l_idx[i], r_idx[i]); printf("\n");
}
return min_len;
}
int main() {
int n;
scanf("%d", &n);
int n_forth = n / 4;
char *s = malloc(sizeof(char) * n);
int *si = malloc(sizeof(int) * n);
int *so = malloc(sizeof(int) * n);
scanf("%s", s);
int freq[] = {0, 0, 0, 0};
for (int i = 0; i < n; i++) {
int j = 0;
switch (s[i]) {
case 'C': j = 1; break;
case 'T': j = 2; break;
case 'G': j = 3; break;
}
si[i] = j;
freq[j]++;
so[i] = freq[j];
}
int deviation[] = {0, 0, 0, 0};
for (int i = 0; i < 4; i++) {
deviation[i] = freq[i] - n_forth;
}
//printf("n_forth: %d, freq:", n_forth); for (int i = 0; i < 4; i++) printf(" %d(%d)", freq[i], deviation[i]); printf("\n");
//printf("si:"); for (int i = 0; i < n; i++) printf(" %d", si[i]); printf("\n");
//printf("so:"); for (int i = 0; i < n; i++) printf(" %d", so[i]); printf("\n");
printf("%d", solve(si, so, n, deviation));
free(s);
free(si);
free(so);
return 0;
}
Bear and Steady Gene C++ Solution
#include <cmath>
#include <cstdio>
#include <array>
#include <vector>
#include <iostream>
#include <queue>
#include <climits>
#include <algorithm>
using namespace std;
int Calc(string s){
int a=0,g=0,t=0,c=0;
for(const auto& elem : s){
switch(elem){
case 'A': a++; break;
case 'G': g++;break;
case 'T': t++;break;
case 'C': c++;break;
}
}
// cout << a << " "<< g << " " << t << " " <<c<< endl;
vector<int> num_to_eliminate(256,0);
int required_num_letters = s.size() / 4;
num_to_eliminate['A'] = max(a - required_num_letters, 0);
num_to_eliminate['G'] = max(g - required_num_letters, 0);
num_to_eliminate['T'] = max(t - required_num_letters, 0);
num_to_eliminate['C'] = max(c - required_num_letters, 0);
int total_over = accumulate(num_to_eliminate.begin(), num_to_eliminate.end(),0);
if (total_over == 0)
return 0;
// cout << "total_over" << total_over <<endl;
bool contains_enough = false;
int best_len = s.size();
vector<int> subseq_count(256,0);
deque<char> dq;
int subseq_total_count = 0;
for(int i = 0 ; i < s.size() ; ++i){
char curr = s.at(i);
dq.push_front(curr);
if (subseq_count[curr] < num_to_eliminate[curr]){
subseq_total_count++;
// cout << "subseq_total_count" << subseq_total_count<<endl;
if (subseq_total_count == total_over){
// finally full
contains_enough = true;
// cout << "found all " << endl;
}
}
subseq_count[curr]++;
while(!dq.empty()){
// try removing the last one
char last_item = dq.back();
if (subseq_count[last_item] > num_to_eliminate[last_item]){
dq.pop_back();
subseq_count[last_item]--;
}
else{
break;
}
}
if (contains_enough){
best_len = min(best_len, (int)dq.size());
}
/*
if (subseq_count[curr] < num_to_eliminate[curr]){
if (subseq_start == -1){
subseq_start = i; // starting new subsequence
}
subseq_total_count++;
subseq_count[curr]++;
if (subseq_total_count == total_over){
int curr_len = i - subseq_start;
best_len = max(best_len, curr_len);
}
}*/
}
return best_len;
}
int main() {
int n;
cin >> n;
string s;
cin >> s;
int result = Calc(s);
cout << result << endl;
return 0;
}
Bear and Steady Gene C Sharp Solution
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
public class Solver
{
/*
* Julia worked through the code execution, then, wrote down the steps using test case:
*
* GAAATAAA
*
* var a = [3, 0,0,0,2, 0, 0, 0]
* Target[] = [4, 0, 0, 0]
*
* If all element in Target == 0, then return 0
*
* Left, right two pointer
Start a loop,
Add a[r]’s char into Sum array,
r++, r moves to next one
inside the first loop, another loop for r, continue to move forward if sums not reaches target. – 4 of them >= target
*
inside the first loop, another loop for left pointer – l
condition: l < r
if sums array is bigger than target array, then remove left pointer char count, then, move left pointer to next one
then, the substring is the reaching the target, and compare to existing smallest length
*
Julia's comment: the code is more readable and quick to write
*/
public void Solve()
{
int n = ReadInt();
var a = ReadToken().Select(ch => "ACTG".IndexOf(ch)).ToArray(); // var a = [3, 0,0,0,2, 0, 0, 0]
var target = new int[4];
for (int i = 0; i < 4; i++)
target[i] = Math.Max(0, a.Count(aa => aa == i) - n / 4);
if (target.All(t => t == 0))
{
Write(0);
return;
}
int ans = n;
int l = 0, r = 0;
var sums = new int[4];
while (r < n)
{
sums[a[r]]++;
r++;
while (r < n && !(sums[0] >= target[0] && sums[1] >= target[1] && sums[2] >= target[2] && sums[3] >= target[3]))
{
sums[a[r]]++;
r++;
}
while (l < r)
{
if (!(sums[0] >= target[0] && sums[1] >= target[1] && sums[2] >= target[2] && sums[3] >= target[3]))
break;
sums[a[l]]--;
l++;
}
ans = Math.Min(ans, r - l + 1);
}
Write(ans);
}
#region Main
protected static TextReader reader;
protected static TextWriter writer;
static void Main()
{
reader = new StreamReader(Console.OpenStandardInput());
writer = new StreamWriter(Console.OpenStandardOutput());
//reader = new StreamReader("input.txt");
//writer = new StreamWriter("output.txt");
try
{
//var thread = new Thread(new Solver().Solve, 1024 * 1024 * 128);
//thread.Start();
//thread.Join();
new Solver().Solve();
}
catch (Exception ex)
{
Console.WriteLine(ex);
#if DEBUG
#else
throw;
#endif
}
reader.Close();
writer.Close();
}
#endregion
#region Read / Write
private static Queue<string> currentLineTokens = new Queue<string>();
private static string[] ReadAndSplitLine() {
return reader.ReadLine().Split(new[] { ' ', '\t', },
StringSplitOptions.RemoveEmptyEntries);
}
public static string ReadToken() {
while (currentLineTokens.Count == 0)
currentLineTokens = new Queue<string>(ReadAndSplitLine());
return currentLineTokens.Dequeue();
}
public static int ReadInt() {
return int.Parse(ReadToken());
}
public static long ReadLong() { return long.Parse(ReadToken()); }
public static double ReadDouble() { return double.Parse(ReadToken(), CultureInfo.InvariantCulture); }
public static int[] ReadIntArray() { return ReadAndSplitLine().Select(int.Parse).ToArray(); }
public static long[] ReadLongArray() { return ReadAndSplitLine().Select(long.Parse).ToArray(); }
public static double[] ReadDoubleArray() { return ReadAndSplitLine().Select(s => double.Parse(s, CultureInfo.InvariantCulture)).ToArray(); }
public static int[][] ReadIntMatrix(int numberOfRows) { int[][] matrix = new int[numberOfRows][]; for (int i = 0; i < numberOfRows; i++)matrix[i] = ReadIntArray(); return matrix; }
public static int[][] ReadAndTransposeIntMatrix(int numberOfRows)
{
int[][] matrix = ReadIntMatrix(numberOfRows); int[][] ret = new int[matrix[0].Length][];
for (int i = 0; i < ret.Length; i++) { ret[i] = new int[numberOfRows]; for (int j = 0; j < numberOfRows; j++)ret[i][j] = matrix[j][i]; } return ret;
}
public static string[] ReadLines(int quantity) { string[] lines = new string[quantity]; for (int i = 0; i < quantity; i++)lines[i] = reader.ReadLine().Trim(); return lines; }
public static void WriteArray<T>(IEnumerable<T> array) { writer.WriteLine(string.Join(" ", array)); }
public static void Write(params object[] array) { WriteArray(array); }
public static void WriteLines<T>(IEnumerable<T> array) { foreach (var a in array)writer.WriteLine(a); }
private class SDictionary<TKey, TValue> : Dictionary<TKey, TValue>
{
public new TValue this[TKey key]
{
get { return ContainsKey(key) ? base[key] : default(TValue); }
set { base[key] = value; }
}
}
private static T[] Init<T>(int size) where T : new() { var ret = new T[size]; for (int i = 0; i < size; i++)ret[i] = new T(); return ret; }
#endregion
}
Bear and Steady Gene Java Solution
import java.io.*;
import java.util.*;
public class Solution {
public static void main(String[] args) {
/* Enter your code here. Read input from STDIN. Print output to STDOUT. Your class should be named Solution. */
Scanner in = new Scanner(System.in);
int n = in.nextInt();
String s = in.next();
String genes = "ATGC";
int [] cnt = new int[4];
int left = 0;
for(int i=0;i<n;i++){
int cur = genes.indexOf(s.charAt(i));
if(cnt[cur] + 1 > n / 4) {left = i-1; break;}
cnt[cur] ++ ;
}
if(left == 0){
System.out.println(0);
return;
}
int res = n;
int right = n-1;
for(int i = left; i >= 0; i--){
int cur;
while(right>0){
cur = genes.indexOf(s.charAt(right));
if(cnt[cur] + 1 > n/4) break;
cnt[cur]++;
right -- ;
}
cur = genes.indexOf(s.charAt(i));
cnt[cur] -- ;
res = Math.min(res, right-i);
}
System.out.println(res);
}
}
Bear and Steady Gene JavaScript Solution
function steadyGene(group, limit){
return group['A'] <= limit && group['T'] <= limit && group['C'] <= limit && group['G'] <= limit;
}
function processData(input) {
var n = parseInt(input[0]),
limit = n / 4,
s = input[1],
group = {
A: 0,
T: 0,
C: 0,
G: 0
};
var maxIndex = null;
// Find the rightmost index until the chain remains steady.
for(var i = n - 1; i >= 0; i--){
group[s[i]]++;
if(!steadyGene(group, limit)){
group[s[i]]--;
maxIndex = i + 1;
break;
}
}
var minSubstring = null, substringLen;
// Find the leftmost index and minimum Substring.
find: for(var minIndex = 0; minIndex < maxIndex; minIndex++){
group[s[minIndex]]++;
while(!steadyGene(group, limit)){ // While not steadygene...
if(maxIndex >= n){ // if maxIndex goes over the s.length, break all process
break find;
}
group[s[maxIndex]]--; // decrement until find the next steadygene.
maxIndex++;
}
substringLen = maxIndex - minIndex - 1;
if(minSubstring === null || substringLen < minSubstring){
minSubstring = substringLen;
}
}
console.log(minSubstring || 0);
}
var _input = "";
process.stdin.resume();
process.stdin.setEncoding("ascii");
process.stdin.on("data", function (input) {
_input += input;
});
process.stdin.on("end", function () {
processData(_input.split('\n'));
});
Bear and Steady Gene Python Solution
toidx = {'A': 0, 'C': 1, 'T': 2, 'G': 3}
n = int(input())
text = input().strip()
limit = n // 4
def check_limit(arr, limit):
return arr[0] <= limit and arr[1] <= limit and arr[2] <= limit and arr[3] <= limit
counts = [0] * 4
i = 0
for v in text:
idx = toidx[v]
counts[idx] += 1
if not check_limit(counts, limit):
counts[idx] -= 1
break
i += 1
if i == n:
print(0)
else:
i -= 1
result = n - i - 1
j = n - 1
while j >= 0:
idx = toidx[text[j]]
counts[idx] += 1
while i >= 0 and not check_limit(counts, limit):
idx_i = toidx[text[i]]
counts[idx_i] -= 1
if i == 0:
break
i -= 1
if not check_limit(counts, limit):
counts[idx] -= 1
j += 1
break
_result = j - i - 1
if _result < result:
result = _result
if i == 0:
break
j -= 1
_result = j - i - 1
if _result < result:
result = _result
print(result)
Other Solutions