HackerRank Find Strings Problem Solution
In this post, we will solve HackerRank Find Strings Problem Solution.
A substring is defined as a contiguous sequence of one or more characters in the string. More information on substrings can be found here.
You are given n strings w[1], [2],…….. w[n]. Let S[i] denote the set of all distinct substrings of the string w[i]. Let S = {S[1] U S[2]U…….. S[n]}, that is, S is a set of strings that is the union of all substrings in all sets S[1], S[2],….. S[n]. There will be many queries. For each query you will be given an integer ‘k’. Your task is to find the kth element of the 1-indexed lexicographically ordered set of substrings in the set S. If there is no element k, return
INVALID.
For example, your strings are w = [abc, cde]. All of the substrings are
S[1] = {a, b, c, ab, bc, abc} and S[2] = {c, d, e, cd, de, cde}. Combine the two sets and sort them to get S = {a, ab, abc, b, bc, c, cd, cde, d, de, e}. So, for instance if k = 1, we return ‘a’. If k = 5, we return ‘bc’. If k = 20 though, there is not an S[20] so we return
INVALID.
Function Description
Complete the findStrings function in the editor below. It should return array of strings.
findStrings has the following parameter(s):
- w: an array of strings
- queries: an array of integers
Input Format
The first line contains an integer n, the number of strings in the array w.
Each of the next n lines consists of a string w[i].
The next line contains an integer q, the number of queries.
Each of the next q lines consists of a single integer k.
Output Format
Return an array of q strings where the ith string is the answer to the ith query. If a k is invalid, return “INVALID” for that case.
Sample Input
2
aab
aac
3
3
8
23
Sample Output
aab
c
INVALID
Explanation
For the sample test case, we have 2 strings “aab” and “aac”.
S1 = {“a”, “aa”, “aab”, “ab”, “b”} . These are the 5 unique substrings of “aab”.
S2 = {“a”, “aa”, “aac”, “ac”, “c” } . These are the 5 unique substrings of “aac”.
Now, S = {S1 U S2} = {“a”, “aa”, “aab”, “aac”, “ab”, “ac”, “b”, “c”}. Totally, 8 unique strings are present in the set S.
The lexicographically 3rd smallest string in S is “aab” and the lexicographically 8th smallest string in S is “c”. Since there are only 8 distinct substrings, the answer to the last query is “INVALID”.
Find Strings C Solution
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
char input[3000], output[3000];
int queries[600];
int malloc_cnt = 0;
//HASH table for ints implementation
typedef struct bucket_item{
int key;
char *val;
struct bucket_item *next;
} HASH_NODE;
#define HASH_MODULUS 997
HASH_NODE* hash_arr[HASH_MODULUS];
int calc_hash(int i) {
return (i % HASH_MODULUS);
}
HASH_NODE *get_hash_table_entry(int i) {
int hash = calc_hash(i);
HASH_NODE *item = hash_arr[hash];
if(item == NULL) { return NULL; }
while(item) {
if(item->key == i) { return item; }
item = item->next;
}
return NULL;
}
void insert_into_hash_table(int i) {
if(get_hash_table_entry(i) != NULL) { return; }
int hash = calc_hash(i);
HASH_NODE *new_node = malloc(sizeof(HASH_NODE));
new_node->key = i;
new_node->val = malloc(sizeof(char) * 2200);
new_node->next = NULL;
strcpy(new_node->val, "INVALID");
if(hash_arr[hash] == NULL) {
hash_arr[hash] = new_node;
} else {
new_node->next = hash_arr[hash];
hash_arr[hash] = new_node;
}
}
void print_keys_in_hash_table() {
int i;
HASH_NODE *tmp;
printf("HASH TABLE ENTRIES\n");
for(i = 0; i < HASH_MODULUS; i++) {
tmp = hash_arr[i];
while(tmp) {
printf("%d\n", tmp->key);
tmp = tmp->next;
}
}
printf("HASH TABLE ENTRIES OVER\n");
}
//
typedef struct node{
char *word;
int len;
struct node *children[26];
}NODE;
typedef void (*TRAVERSE_FUNC) (char *, int);
NODE* tree_root;
//return the length of the largest common prefix of str1, str2
int prefix_match_cnt(char * str1, char *str2, int len1, int len2) {
int i = 0;
while((i < len1) && (i < len2) && (*str1++ == *str2++)) {
i++;
}
return i;
}
NODE* create_node(char *str, int len) {
int i;
NODE *tmp = malloc(sizeof(NODE));
tmp->word = str;
tmp->len = len;
for(i = 0; i < 26; i++) {
tmp->children[i] = NULL;
}
malloc_cnt++;
return tmp;
}
void add_suffix_string_to_tree(NODE *node, NODE *par_node, char *str, int len) {
int i, match_cnt;
NODE* new_node;
if(len == 0) { return; }
if(node->len != -1) { //node->len = -1 ==> root
match_cnt = prefix_match_cnt(str, node->word, len, node->len);
if(match_cnt == len) { return; } //The string is already present in the tree. return
//we are here ==> (match_cnt < len), (match_cnt <= node->len).
str = &str[match_cnt];
len -= match_cnt;
if(match_cnt < node->len) {
//split the current node with match_cnt length
new_node = create_node(node->word, match_cnt);
par_node->children[node->word[0] - 'a'] = new_node;
node->word += match_cnt;
node->len -= match_cnt;
new_node->children[node->word[0] - 'a'] = node;
node = new_node;
}
}
i = (str[0] - 'a');
if(node->children[i] == NULL) {
node->children[i] = create_node(str, len);
} else {
add_suffix_string_to_tree(node->children[i], node, str, len);
}
}
void add_all_suffixes_to_tree(NODE *root, char *str) {
int i, len = strlen(str);
for(i = 0; i < len; i++) {
add_suffix_string_to_tree(root, NULL, &str[i], (len - i));
}
}
void traverse_all_substrings(NODE *node, char *out, int len, TRAVERSE_FUNC func) {
int i;
if(node == NULL) { return; }
if(node->len == -1) {
for(i = 0; i < 26; i++) {
traverse_all_substrings(node->children[i], out, len, func);
}
} else {
strncpy(&out[len], node->word, node->len);
for(i = 0; i < node->len; i++) {
func(out, len + (i + 1));
}
for(i = 0; i < 26; i++) {
traverse_all_substrings(node->children[i], out, len + (node->len), func);
}
}
}
void print_substring(char *str, int len) {
char c = str[len];
str[len] = '\0';
printf("%s\n", str);
str[len] = c;
}
int total_substr_cnt = 0;
void cnt_substrings(char *str, int len) {
total_substr_cnt++;
}
int substr_cnt = 0;
void handle_substring(char *str, int len) {
char c = str[len];
HASH_NODE *item;
substr_cnt++;
item = get_hash_table_entry(substr_cnt);
if(item) {
str[len] = '\0';
strcpy(item->val, str);
str[len] = c;
return;
}
}
char* chomp(char * str) {
int len = strlen(str);
if((len > 0) && (str[len - 1] == '\n')) { str[len - 1] = '\0'; }
if((len > 1) && (str[len - 2] == '\r')) { str[len - 2] = '\0'; }
return str;
}
char* convert_to_lower(char* str) {
int len = strlen(str), i;
for(i = 0; i < len; i++) {
str[i] = tolower(str[i]);
}
return str;
}
FILE *file_open(char *filename, char *mode) {
FILE *fp = fopen(filename, mode);
if(fp == NULL) {
perror("file_open");
exit(0);
}
return fp;
}
int main(int argc, char **argv) {
int num_words, num_queries, tmp;
char *str;
int query_idx = 0, idx;
HASH_NODE *item;
FILE *fp = stdin;
if(argc > 1) {
fp = file_open(argv[1], "r");
}
fgets(input, 3000, fp);
num_words = tmp = atoi(input);
// printf(":%d:\n", num_words);
tree_root = create_node(NULL, -1); //Root marker
while(tmp > 0) {
tmp--;
fgets(input, 3000, fp);
chomp(input);
convert_to_lower(input);
//printf("Adding \"%s\" ....", input);
str = malloc(sizeof(char) * 3000);
strcpy(str, input);
add_all_suffixes_to_tree(tree_root, str);
//printf("Done\n");
}
total_substr_cnt = 0;
traverse_all_substrings(tree_root, output, 0, cnt_substrings);
/* printf("OUTPUT:\n____________________\n");
traverse_all_substrings(tree_root, output, 0, print_substring);
printf("\n____________________\n");*/
fgets(input, 3000, fp);
num_queries = tmp = atoi(input);
// printf(":%d:\n", num_queries);
while(tmp > 0) {
tmp--;
fgets(input, 3000, fp);
chomp(input);
// printf("%s: ", input);
queries[query_idx] = atoi(input);
insert_into_hash_table(queries[query_idx]);
query_idx++;
}
// print_keys_in_hash_table();
// printf("total_substr_cnt: %d\n", total_substr_cnt);
substr_cnt = 0;
traverse_all_substrings(tree_root, output, 0, handle_substring);
for(idx = 0; idx < query_idx; idx++) {
item = get_hash_table_entry(queries[idx]);
printf("%s\n", item->val);
}
//Free all the memory if you have free time ;)
// printf("Malloc Count: %d \n", malloc_cnt);
// getchar();
return 0;
}
Find Strings C++ Solution
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <cmath>
#include <string>
#include <cstring>
#include <cctype>
#include <algorithm>
#include <vector>
#include <bitset>
#include <queue>
#include <stack>
#include <utility>
#include <list>
#include <set>
#include <map>
using namespace std;
/****************SUFFIX ARRAY*********************/
#define MAXN 1000005
int n,t; //n es el tamaño de la cadena
int p[MAXN],r[MAXN],h[MAXN];
//p es el inverso del suffix array, no usa indices del suffix array ordenado
//h el el tamaño del lcp entre el i-esimo y el i+1-esimo elemento de suffix array ordenado
string s;
void fix_index(int *b, int *e) {
int pkm1, pk, np, i, d, m;
pkm1 = p[*b + t];
m = e - b; d = 0;
np = b - r;
for(i = 0; i < m; i++) {
if (((pk = p[*b+t]) != pkm1) && !(np <= pkm1 && pk < np+m)) {
pkm1 = pk;
d = i;
}
p[*(b++)] = np + d;
}
}
bool comp(int i, int j) {
return p[i + t] < p[j + t];
}
void suff_arr() {
int i, j, bc[256];
t = 1;
for(i = 0; i < 256; i++) bc[i] = 0; //alfabeto
for(i = 0; i < n; i++) ++bc[int(s[i])]; //counting sort inicial del alfabeto
for(i = 1; i < 256; i++) bc[i] += bc[i - 1];
for(i = 0; i < n; i++) r[--bc[int(s[i])]] = i;
for(i = n - 1; i >= 0; i--) p[i] = bc[int(s[i])];
for(t = 1; t < n; t *= 2) {
for(i = 0, j = 1; i < n; i = j++) {
while(j < n && p[r[j]] == p[r[i]]) ++j;
if (j - i > 1) {
sort(r + i, r + j, comp);
fix_index(r + i, r + j);
}
}
}
}
//Longest Common Preffix
void lcp() {
int tam = 0, i, j;
for(i = 0; i < n; i++)if (p[i] > 0) {
j = r[p[i] - 1];
while(s[i + tam] == s[j + tam]) ++tam;
h[p[i]] = tam;
if (tam > 0) --tam;
}
h[0] = 0;
}
/***************************************************/
int main(){
int numS=0; scanf("%d",&numS);
int res=numS;
string sTemp;
while(res--){
cin>>sTemp;
s +=sTemp+char(res+1);
}
n=s.size();
suff_arr();
lcp();
vector<int> l(n);
//Compute the l'
for (int i = n-1,acum=1; i >= 0; --i)
{
if(s[i]<'a')
acum=0;
l[p[i]]=acum;
acum++;
}
//for(int i=1;i<n;i++)cout<<"i -> "<<i<<" "<<s.substr(r[i])<<" "<<r[i]<<" "<<l[i]<<endl;
//Queries
int q;scanf("%d",&q);
while(q--){
int k,i=1;
scanf("%d",&k);
for (; i < n; ++i)
{
int comp = l[i]-h[i];
if(comp>=k){
cout<<s.substr(r[i],h[i]+k)<<endl;
break;
}else{
k-=comp;
}
}
if(i==n)
cout<<"INVALID"<<endl;
}
return 0;
}
Find Strings C Sharp Solution
using System.CodeDom.Compiler;
using System.Collections.Generic;
using System.Collections;
using System.ComponentModel;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.Serialization;
using System.Text.RegularExpressions;
using System.Text;
using System;
class Result
{
/*
* Complete the 'findStrings' function below.
*
* The function is expected to return a STRING_ARRAY.
* The function accepts following parameters:
* 1. STRING_ARRAY w
* 2. INTEGER_ARRAY queries
*/
public static List<string> findStrings(List<string> w, List<int> queries)
{
string all = string.Join("^", w);
var suffixArray = (SuffixArrayFromSuffixTree)SuffixArray.Build(all, SuffixArrayAlgorithm.SuffixTree);
var result = new List<string>();
foreach (var q in queries)
{
result.Add(suffixArray.find(q));
}
return result;
}
}
class Solution
{
public static void Main(string[] args)
{
TextWriter textWriter = new StreamWriter(@System.Environment.GetEnvironmentVariable("OUTPUT_PATH"), true);
int wCount = Convert.ToInt32(Console.ReadLine().Trim());
List<string> w = new List<string>();
for (int i = 0; i < wCount; i++)
{
string wItem = Console.ReadLine();
w.Add(wItem);
}
int queriesCount = Convert.ToInt32(Console.ReadLine().Trim());
List<int> queries = new List<int>();
for (int i = 0; i < queriesCount; i++)
{
int queriesItem = Convert.ToInt32(Console.ReadLine().Trim());
queries.Add(queriesItem);
}
List<string> result = Result.findStrings(w, queries);
textWriter.WriteLine(String.Join("\n", result));
textWriter.Flush();
textWriter.Close();
}
}
public abstract class SuffixArray
{
#region Api
public abstract IComparer<char> Comparer { get; }
/// <summary>
/// Text
/// </summary>
public abstract string Text { get; }
/// <summary>
/// Length of suffix array
/// </summary>
public abstract int Length { get; }
/// <summary>
/// Start of the i'th suffix
/// </summary>
public abstract int this[int i] { get; }
/// <summary>
/// Get LCP value for i'th suffix
/// </summary>
public abstract int Lcp(int i);
/// <summary>
/// Enumerate all suffixes
/// </summary>
public abstract IEnumerable<ISuffix> GetStrings();
/// <summary>
/// Get ith substring in lexicographically ordered list of substrings
/// </summary>
public abstract String GetSubString(int index);
/// <summary>
/// Find all occurrences of a pattern
/// </summary>
public abstract IEnumerable<ISuffix> Find(string pattern);
#endregion
#region Construction
public static SuffixArray Build(string text, SuffixArrayAlgorithm algorithm)
{
return Build(text, Comparer<char>.Default, algorithm);
}
public static SuffixArray Build(string text, IComparer<char> comparer, SuffixArrayAlgorithm algorithm)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
if (text.Contains(SuffixTree.TerminationCharacter))
{
throw new ArgumentException("Input contains termination character", nameof(text));
}
switch (algorithm)
{
case SuffixArrayAlgorithm.SuffixTree: // O(n)
return new SuffixTreeUkkonenLinear(text, comparer).GetSuffixArray();
default:
throw new NotImplementedException($"No implementation for algoritm {algorithm}");
}
}
#endregion
}
public interface ISuffix
{
int StartIndex { get; }
string Value { get; }
}
public enum SuffixArrayAlgorithm
{
Naive,
SuffixTree
}
public abstract class BurrowsWheelerTransform
{
#region Fields
protected internal static char EndCharacter = '|';
#endregion
#region Construction
public static string Calculate(string text, BurrowsWheelerTransformAlgorithm algorithm)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
if (text.Contains(EndCharacter))
{
throw new ArgumentException("Input contains end character", nameof(text));
}
switch (algorithm)
{
case BurrowsWheelerTransformAlgorithm.Naive:
return CalculateNaive(text);
case BurrowsWheelerTransformAlgorithm.SuffixArray:
return CalculateWithSuffixArray(text, SuffixArrayAlgorithm.SuffixTree);
default:
throw new NotImplementedException($"No implementation for algoritm {algorithm}");
}
}
private static string CalculateWithSuffixArray(string text, SuffixArrayAlgorithm algorithm)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
var input = $"{text}{EndCharacter}";
var suffixArray = SuffixArray.Build(input, SpecializedCharComparer.Instance, algorithm);
var result = new StringBuilder();
for (int i = 1; i < suffixArray.Length; ++i)
{
result.Append(suffixArray[i] > 0 ? input[suffixArray[i] - 1] : EndCharacter);
}
return result.ToString();
}
private static string CalculateNaive(string text)
{
var input = $"{text}{EndCharacter}";
var rotations = new List<string>();
for (int i = 0; i < input.Length; ++i)
{
var rotated = new StringBuilder();
for (int j = 0; j < input.Length; ++j)
{
rotated.Append(input[(i + j) % input.Length]);
}
rotations.Add(rotated.ToString());
}
rotations.Sort(new SpecializedStringComparer(SpecializedCharComparer.Instance));
var result = new StringBuilder();
for (int i = 0; i < input.Length; ++i)
{
result.Append(rotations[i][input.Length - 1]);
}
return result.ToString();
}
#endregion
}
public enum BurrowsWheelerTransformAlgorithm
{
Naive,
SuffixArray
}
public class SpecializedStringComparer : IComparer<string>
{
private readonly IComparer<char> comparer;
public SpecializedStringComparer(IComparer<char> charComparer)
{
this.comparer = charComparer;
}
public int Compare(string x, string y)
{
return Compare(x, 0, y, 0);
}
public int Compare(string x, int fromX, string y, int fromY)
{
var lenX = x.Length - fromX;
var lenY = y.Length - fromY;
var len = Math.Min(lenX, lenY);
var result = Compare(x, fromX, y, fromY, len);
if (result != 0)
{
return result;
}
if (lenX == lenY)
{
return 0;
}
else if (lenX > lenY)
{
return 1;
}
else
{
return -1;
}
}
public int Compare(string x, int fromX, string y, int fromY, int len)
{
if (x == null)
{
if (y == null)
{
return 0;
}
else
{
return -1;
}
}
else if (y == null)
{
return 1;
}
for (int i = 0; i < len; ++i)
{
var c = comparer.Compare(x[i + fromX], y[i + fromY]);
if (c != 0)
{
return c;
}
}
return 0;
}
}
public class SpecializedCharComparer : IComparer<char>
{
public static readonly SpecializedCharComparer Instance = new SpecializedCharComparer();
private SpecializedCharComparer()
{ }
public int Compare(char x, char y)
{
var cx = Map(x);
var cy = Map(y);
if (cx == cy)
{
return 0;
}
else if (cx > cy)
{
return 1;
}
else
{
return -1;
}
}
private long Map(char c)
{
if (c == SuffixTree.TerminationCharacter)
{
return -2;
}
if (c == BurrowsWheelerTransform.EndCharacter)
{
return -1;
}
return c;
}
}
public class SuffixTreeUkkonenLinear : SuffixTree
{
#region Fields
private static readonly int currentPosition = int.MinValue;
private readonly Node root;
private readonly string text;
private readonly IComparer<char> comparer;
#endregion
#region Properties
public IComparer<char> Comparer => comparer;
public string Text => text;
#endregion
#region Constructor
public SuffixTreeUkkonenLinear(string inputText)
: this(inputText, Comparer<char>.Default)
{ }
public SuffixTreeUkkonenLinear(string inputText, IComparer<char> charComparer)
{
text = inputText + TerminationCharacter;
comparer = charComparer;
root = Build(text);
}
public SuffixTreeUkkonenLinear(SuffixArray array)
{
text = array.Text + TerminationCharacter;
comparer = array.Comparer;
root = BuildFromArray(array);
}
#endregion
#region Api
public override bool IsMatch(string substring)
{
return Match(substring).Any();
}
public override IEnumerable<int> Match(string substring)
{
var node = Navigate(root, 0, substring.Length, substring, text, false, comparer);
if (!node.isFound)
{
yield break;
}
var stack = new Stack<Node>();
if (node.childIndex < 0)
{
stack.Push(node.parent);
}
else
{
stack.Push(node.parent.children[node.childIndex]);
}
while (stack.Count > 0)
{
var current = stack.Pop();
if (HasChildren(current))
{
foreach (var child in current.children)
{
stack.Push(child);
}
}
else
{
yield return current.pos;
}
}
}
#endregion
#region Class Api
public int[] GetZFunction()
{
var result = new int[text.Length - 1];
foreach (var item in GetSuffixesThatArePrefixes())
{
result[item.Item1] = item.Item2;
}
result[0] = 0;
return result;
}
private IEnumerable<ValueTuple<int, int>> GetSuffixesThatArePrefixes()
{
var node = root;
var k = 0;
var to = text.Length - 1;
while (k != to)
{
var childIndex = FindChild(text[k], node, text, comparer);
if (childIndex < 0)
{
throw new InvalidOperationException("BUG: Wrong tree");
}
var child = node.children[childIndex];
var skip = Math.Min(to - k, end(child, to + 1) - child.start);
k += skip;
if (k == to)
{
yield return new ValueTuple<int, int>(child.pos, k);
}
else if (child.start + skip == end(child, to + 1))
{
if (!HasChildren(child))
{
throw new InvalidOperationException("BUG: Wrong tree");
}
else
{
foreach (var subNode in Visit(child, text, comparer).Where(n => !HasChildren(n)))
{
yield return new ValueTuple<int, int>(subNode.pos, k);
}
}
node = child;
}
else
{
throw new InvalidOperationException("BUG: Wrong tree");
}
}
}
public IEnumerable<ValueTuple<int, int>> GetSuffixes()
{
var stack = new Stack<ValueTuple<Node, int>>();
stack.Push(new ValueTuple<Node, int>(root, 0));
var lcpStack = new Stack<int>();
var next = 0;
while (stack.Count > 0)
{
var current = stack.Pop();
var node = current.Item1;
var len = current.Item2;
if (len == -1)
{
lcpStack.Pop();
if (lcpStack.Count > 0)
{
next = lcpStack.Peek();
}
else
{
next = 0;
}
}
else if (HasChildren(node))
{
stack.Push(new ValueTuple<Node, int>(node, -1));
foreach (var child in node.children.OrderByDescending(c => text[c.start], comparer))
{
stack.Push(new ValueTuple<Node, int>(child, len + (node.end - node.start)));
}
lcpStack.Push(len + (node.end - node.start));
}
else
{
yield return new ValueTuple<int, int>(node.pos, next);
next = lcpStack.Peek();
}
}
}
public SuffixArray GetSuffixArray()
{
return new SuffixArrayFromSuffixTree(this);
}
private static IEnumerable<Node> Visit(Node root, string text, IComparer<char> comparer)
{
var stack = new Stack<Node>();
stack.Push(root);
while (stack.Count > 0)
{
var current = stack.Pop();
if (HasChildren(current))
{
foreach (var child in current.children.OrderByDescending(c => text[c.start], comparer))
{
stack.Push(child);
}
}
yield return current;
}
}
#endregion
#region Methods
private static Location Navigate(Node parent, int from, int to, string substring, string text, bool useSkipCount, IComparer<char> comparer)
{
var node = parent;
if (from == to)
{
return new Location(true, node, from, 0, -1);
}
// Navigate to the end of substring
var k = from;
while (true)
{
var childIndex = FindChild(substring[k], node, text, comparer);
if (childIndex < 0)
{
return new Location(false, node, k, 0, -1);
}
var child = node.children[childIndex];
var m = 0;
if (useSkipCount)
{
var skip = Math.Min(to - k, end(child, to + 1) - child.start);
m += skip;
k += skip;
}
else
{
while (child.start + m < end(child, to + 1) &&
k < to &&
comparer.Compare(text[child.start + m], substring[k]) == 0)
{
++m;
++k;
}
}
if (k == to)
{
return new Location(true, node, k, m, childIndex);
}
else if (child.start + m == end(child, to + 1))
{
if (!HasChildren(child))
{
return new Location(false, node, k, m, childIndex);
}
node = child;
}
else
{
return new Location(false, node, k, m, childIndex);
}
}
}
private static int end(Node node, int pos)
{
if (node.end == currentPosition)
return pos - 1;
return node.end;
}
private static int FindChild(char c, Node node, string text, IComparer<char> comparer)
{
if (node.children == null)
{
return -1;
}
for (int i = 0; i < node.children.Count; ++i)
{
var child = node.children[i];
if (comparer.Compare(text[child.start], c) == 0)
{
return i;
}
}
return -1;
}
private static bool HasChildren(Node node)
{
return node.children != null;
}
#endregion
#region Construction
private Node BuildFromArray(SuffixArray array)
{
//T0
var root = new Node
{
children = new List<Node>()
};
root.children.Add(new Node
{
pos = array[1],
start = array[1],
end = array.Length,
parent = root,
});
var lastLeaf = root.children[0];
var lastDepth = lastLeaf.end - lastLeaf.start;
for (int i = 2; i < array.Length; ++i)
{
var lcp = array.Lcp(i);
if (lcp == 0)
{
// No common prefix, start from the root
var child = new Node
{
pos = array[i],
start = array[i],
end = array.Length,
parent = root,
};
root.children.Add(child);
lastLeaf = child;
lastDepth = array.Length - array[i];
}
else
{
var lastPos = lastLeaf.pos;
while (lastDepth - (lastLeaf.end - lastLeaf.start) > lcp)
{
lastDepth = lastDepth - (lastLeaf.end - lastLeaf.start);
lastLeaf = lastLeaf.parent;
}
if (lastDepth - (lastLeaf.end - lastLeaf.start) == lcp)
{
var leaf2 = new Node
{
pos = array[i],
start = array[i] + lcp,
end = array.Length,
parent = lastLeaf.parent
};
lastLeaf.parent.children.Add(leaf2);
lastLeaf = leaf2;
lastDepth = array.Length - array[i];
}
else if (lastDepth > lcp)
{
var leaf1 = new Node
{
pos = lastLeaf.pos,
start = lastLeaf.end - (lastDepth - lcp),
end = lastLeaf.end,
parent = lastLeaf,
children = lastLeaf.children
};
var leaf2 = new Node
{
pos = array[i],
start = array[i] + lcp,
end = array.Length,
parent = lastLeaf
};
lastLeaf.pos = -1;
lastLeaf.end = lastLeaf.end - (lastDepth - lcp);
lastLeaf.children = new List<Node>() { leaf1, leaf2 };
lastLeaf = leaf2;
lastDepth = array.Length - array[i];
}
else
{
throw new Exception("What?");
}
}
}
return root;
}
private Node Build(string text)
{
var builder = new UkkonenBuilder(text, comparer);
return builder.Build();
}
private class UkkonenBuilder
{
private readonly string text;
private readonly IComparer<char> comparer;
private readonly Node root;
private Node activeLeaf;
private Node nodeThatRequireSuffixLink;
public UkkonenBuilder(string text, IComparer<char> charComparer)
{
this.text = text;
this.comparer = charComparer;
this.root = new Node
{
children = new List<Node>()
};
}
public Node Build()
{
activeLeaf = ConstructT(0);
var next = new Next(root, 1, 1, 0);
for (int i = 1; i < text.Length; ++i)
{
nodeThatRequireSuffixLink = null;
// Phase i+1
// So the first extension of any phase is special and only takes constant time since the algorithm
// has a pointer to the end of the current full string
// I.e. when j = 0
ApplyRule1(activeLeaf, i + 1);
while (next.j < i)
{
var location = Navigate(next.node, next.from, i, text, text, true, comparer);
next = ApplyRule(location, next.j, i);
if (next.rule == 3)
{
// Rule 3 stops current phase
break;
}
}
// Do not put TerminationCharacter to the tree
if (i < text.Length - 1)
{
// Extend empty suffix, by putting the next character to the tree
ConstructT(i);
}
}
foreach (var node in Visit(root, text, comparer))
{
if (node.end == currentPosition)
{
node.end = text.Length;
}
}
return root;
}
private struct Next
{
public Node node;
public int j;
public int from;
public int rule;
public Next(Node node, int j, int from, int rule)
{
this.node = node;
this.j = j;
this.from = from;
this.rule = rule;
}
}
private Next ApplyRule(Location location, int j, int i)
{
if (location.childIndex == -1)
{
ApplyRule2(location.parent, -1, location.offsetInEdge, location.offsetInString, j);
if (location.parent.suffixLink == null)
{
return new Next(root, j + 1, j + 1, 2);
}
else
{
return new Next(location.parent.suffixLink, j + 1, i, 2);
}
}
if (location.offsetInString != i)
{
throw new Exception("What?");
}
var child = location.parent.children[location.childIndex];
if (child.start + location.offsetInEdge == end(child, i + 1))
{
if (HasChildren(child))
{
if (FindChild(child.children, text[i]) >= 0)
{
ApplyRule3();
return new Next(location.parent,
j,
i - location.offsetInEdge,
3);
}
else
{
ApplyRule2(child, -1, 0, location.offsetInString, j);
if (child.suffixLink != null)
{
return new Next(child.suffixLink.parent,
j + 1,
i - (end(child.suffixLink, i + 1) - child.suffixLink.start),
2);
}
else if (location.parent.suffixLink == null)
{
return new Next(root, j + 1, j + 1, 2);
}
else
{
return new Next(location.parent.suffixLink,
j + 1,
i - (end(child, i + 1) - child.start),
2);
}
}
}
else
{
ApplyRule1(child, i + 1);
if (location.parent.suffixLink == null)
{
return new Next(root, j + 1, j + 1, 1);
}
else
{
return new Next(location.parent.suffixLink,
j + 1,
i - location.offsetInEdge,
1);
}
}
}
else
{
if (comparer.Compare(text[child.start + location.offsetInEdge], text[i]) == 0)
{
ApplyRule3();
return new Next(location.parent,
j,
i - location.offsetInEdge,
3);
}
else
{
ApplyRule2(location.parent, location.childIndex, location.offsetInEdge, location.offsetInString, j);
if (location.parent.suffixLink == null)
{
if (location.parent.parent != null && location.parent.parent.suffixLink != null)
{
return new Next(location.parent.parent.suffixLink,
j + 1,
i - location.offsetInEdge - (end(location.parent, i + 1) - location.parent.start),
2);
}
else
{
return new Next(root, j + 1, j + 1, 2);
}
}
else
{
return new Next(location.parent.suffixLink,
j + 1,
i - location.offsetInEdge,
2);
}
}
}
}
private Node ConstructT(int t)
{
var childIndex = FindChild(root.children, text[t]);
if (childIndex >= 0)
{
return root.children[childIndex];
}
var newNode = new Node
{
start = t,
end = currentPosition,
pos = t,
parent = root
};
root.children.Add(newNode);
return newNode;
}
private int FindChild(IList<Node> children, char c)
{
for (int i = 0; i < children.Count; ++i)
{
if (comparer.Compare(text[children[i].start], c) == 0)
{
return i;
}
}
return -1;
}
private void ApplyRule1(Node leaf, int newEnd)
{
//Rule 1. Path ends at a leaf. Extend implicitly
}
private void ApplyRule2(Node parent, int childIndex, int m, int k, int pos)
{
var newParent = default(Node);
if (childIndex >= 0)
{
//Rule 2. Split label and add new leaf
var child = parent.children[childIndex];
// 1) replace child with internal node
newParent = new Node
{
start = child.start,
end = child.start + m,
parent = parent,
suffixLink = null,
children = new List<Node>()
};
parent.children[childIndex] = newParent;
// 2) adjust start position of the child and add it to the new internal node as a child
child.start += m;
child.parent = newParent;
newParent.children.Add(child);
// Assign suffix link
{
if (nodeThatRequireSuffixLink != null)
{
if (nodeThatRequireSuffixLink.suffixLink != null)
{
throw new Exception("What?");
}
nodeThatRequireSuffixLink.suffixLink = newParent;
}
nodeThatRequireSuffixLink = newParent;
}
}
else
{
newParent = parent;
}
// 3) add the rest of the suffix as a new child
if (newParent.children == null)
{
newParent.children = new List<Node>();
}
newParent.children.Add(new Node
{
start = k,
end = currentPosition,
parent = newParent,
pos = pos
});
}
private void ApplyRule3()
{
//Rule 3. Suffix is already in the tree
// Do nothing
}
}
#endregion
#region Types
class Node
{
public int start;
public int end;
public Node parent;
// Leaf
public int pos;
// Internal
public Node suffixLink;
public IList<Node> children;
}
struct Location
{
public bool isFound;
public Node parent;
public int offsetInString;
public int offsetInEdge;
public int childIndex;
public Location(bool isFound, Node parent, int offsetInString, int offsetInEdge, int childIndex)
{
this.isFound = isFound;
this.parent = parent;
this.offsetInString = offsetInString;
this.offsetInEdge = offsetInEdge;
this.childIndex = childIndex;
}
}
#endregion
}
public abstract class SuffixTree
{
#region Fields
protected static internal char TerminationCharacter = '$';
#endregion
#region Api
public abstract bool IsMatch(string substring);
public abstract IEnumerable<int> Match(string substring);
#endregion
#region Construction
public static SuffixTree Build(string text, SuffixTreeAlgorithm algorithm)
{
if (text == null)
{
throw new ArgumentNullException(nameof(text));
}
if (text.Contains(TerminationCharacter))
{
throw new ArgumentException("Input contains termination character", nameof(text));
}
switch (algorithm)
{
case SuffixTreeAlgorithm.UkkonenLinear: // O(n)
return new SuffixTreeUkkonenLinear(text);
default:
throw new NotImplementedException($"No implementation for algoritm {algorithm}");
}
}
#endregion
}
public enum SuffixTreeAlgorithm
{
Naive,
UkkonenCubic,
UkkonenQuadratic,
UkkonenLinear,
UkkonenFast,
Weiner,
McCreight
}
public class SuffixArrayFromSuffixTree : SuffixArray
{
private readonly int[] index;
private readonly int[] lcp;
private readonly int[] strs;
private readonly string text;
private readonly IComparer<char> charComparer;
private readonly SpecializedStringComparer comparer;
public SuffixArrayFromSuffixTree(SuffixTreeUkkonenLinear tree)
{
//TODO: Think about terminator
this.text = tree.Text.Substring(0, tree.Text.Length - 1);
this.charComparer = tree.Comparer;
this.comparer = new SpecializedStringComparer(charComparer);
this.index = new int[text.Length + 1];
this.lcp = new int[text.Length + 1];
this.strs = new int[text.Length + 1];
index[0] = text.Length;
lcp[0] = int.MinValue;
strs[0] = 0;
var i = 1;
foreach (var suffix in tree.GetSuffixes())
{
int len = lenUntil(text, suffix.Item1);
index[i] = suffix.Item1;
lcp[i] = Math.Min(len, suffix.Item2);
strs[i] = strs[i - 1] + len - (i == 1 ? 0 : lcp[i]);
++i;
}
}
private int lenUntil(string text, int item1)
{
int l = 0;
while (item1 < text.Length && text[item1] != '^' && text[item1] != SuffixTree.TerminationCharacter)
{
l++;
item1++;
}
return l;
}
public String find(int i)
{
int start = Array.BinarySearch<int>(strs, i);
if (start < 0)
{
start = ~start;
}
for (int j = start; j < strs.Length; ++j)
{
if (strs[j] >= i)
{
int len = (i - strs[j - 1]) + lcp[j];
return text.Substring(index[j], len);
}
}
return "INVALID";
}
public override IComparer<char> Comparer => charComparer;
public override string Text => text;
public override int Length => index.Length;
public override int this[int i] => index[i];
public override int Lcp(int i)
{
return lcp[i];
}
public override IEnumerable<ISuffix> GetStrings()
{
// Empty suffix
for (int i=0; i<index.Length; ++i)
{
yield return new Suffix(this, index[i]);
}
}
public override String GetSubString(int index)
{
for (int i = 0; i < this.index.Length; ++i)
{
int l = 1;
while (text[l] != SuffixTree.TerminationCharacter)
{
}
}
return null;
}
public override IEnumerable<ISuffix> Find(string pattern)
{
if (string.IsNullOrEmpty(pattern))
{
throw new ArgumentNullException(nameof(pattern));
}
var l = 1;
var r = Length;
while (l < r)
{
var mid = (l + r) / 2;
var len = Math.Min(pattern.Length, text.Length - this[mid]);
if (comparer.Compare(pattern, 0, text, this[mid], len) > 0)
{
l = mid + 1;
}
else
{
r = mid;
}
}
var s = l;
r = Length;
while (l < r)
{
var mid = (l + r) / 2;
var len = Math.Min(pattern.Length, text.Length - this[mid]);
if (comparer.Compare(pattern, 0, text, this[mid], len) < 0)
{
r = mid;
}
else
{
l = mid + 1;
}
}
for (int i = s; i < r; ++i)
{
yield return new Suffix(this, this[i]);
}
}
private class Suffix : ISuffix
{
private readonly SuffixArrayFromSuffixTree owner;
public int StartIndex { get; private set; }
public string Value => owner.text.Substring(StartIndex) + SuffixTree.TerminationCharacter;
public Suffix(SuffixArrayFromSuffixTree owner, int startIndex)
{
this.owner = owner;
this.StartIndex = startIndex;
}
}
}
Find Strings Java Solution
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.TreeSet;
public class Solution {
static TreeSet<String>t;
public static void main(String[] args) {
try{
BufferedReader br=new BufferedReader(new InputStreamReader(System.in));
t=new TreeSet<String>();
int n=Integer.parseInt(br.readLine());
for(int i=0;i<n;i++){
String s=br.readLine();
for(int j=0;j<s.length();j++){
t.add(s.substring(j,s.length()));
}
}
Object [] suffix1=(t.toArray());
String suffix[]=new String[suffix1.length];
for(int l=0;l<suffix.length;l++){
suffix[l]=(String)suffix1[l];
//System.out.println(suffix[l]);
}
int len[]=new int[suffix.length];
int lcp[]=new int[suffix.length];
len[0]=suffix[0].toString().length();
lcp[0]=0;
for(int j=1;j<suffix.length;j++){
int count=0;
try{
while(true){
if(suffix[j-1].charAt(count)==suffix[j].charAt(count)){
count++;
}
else{
break;
}
}}catch(StringIndexOutOfBoundsException e){}
len[j]=suffix[j].length()-count;
lcp[j]=count;
}
int q=Integer.parseInt(br.readLine());
for(int i=0;i<q;i++){
int a=Integer.parseInt(br.readLine());
int a1=0;
int j=0;
int count=0;
try{
while(j<a){
a1=j;
j=j+len[count++];
}
count--;
System.out.println(suffix[count].substring(0, lcp[count]+a-a1));
}catch(ArrayIndexOutOfBoundsException e){
System.out.println("INVALID");
}
}
}catch(IOException e){
System.out.println(e);
}
}
}
Find Strings JavaScript Solution
function processData (input) {
var inputs = input.replace(/\r/g, "").split("\n");
var n = parseInt(inputs.shift());
var strings = [];
while (strings.length < n)
strings.push(inputs.shift());
var tests = parseInt(inputs.shift());
var grouped = {}, count = 0;
for (var s = 0; s < strings.length; s++) {
for (var i = 0; i < strings[s].length; i++) {
var c = strings[s][i];
if (grouped[c] == null)
grouped[c] = {count: 0, instances: []};
grouped[c].instances.push({s: s, i: i, j: 0});
}
}
for (var c in grouped) {
grouped[c].instances.sort(function (a, b) {
for (var ai = a.i, bi = b.i; ai < strings[a.s].length && bi < strings[b.s].length; ai++, bi++) {
if (strings[a.s][ai] < strings[b.s][bi]) return -1;
if (strings[a.s][ai] > strings[b.s][bi]) return 1;
}
if ((strings[a.s].length - a.i) < (strings[b.s].length - b.i)) return -1;
if ((strings[a.s].length - a.i) > (strings[b.s].length - b.i)) return 1;
return 0;
});
for (var i = 1; i < grouped[c].instances.length; i++) {
var a = grouped[c].instances[i];
var b = grouped[c].instances[i-1];
for (var ai = a.i, bi = b.i; ai < strings[a.s].length && bi < strings[b.s].length; ai++, bi++)
if (strings[a.s][ai] == strings[b.s][bi]) a.j++;
else break;
}
for (var i = 0; i < grouped[c].instances.length; i++) {
var a = grouped[c].instances[i];
a.length = strings[a.s].length - a.i;
a.count = a.length - a.j;
a.j++;
grouped[c].count += a.count
}
count += grouped[c].count;
}
var chars = Object.keys(grouped).sort();
for (var i = 0; i < tests && inputs.length; i++) {
var query = parseInt(inputs.shift());
if (query > count) {
process.stdout.write("INVALID" + "\n");
} else {
var group = null, instance = null, cumulative = 0;
for (var j = 0; j < chars.length; j++) {
group = grouped[chars[j]];
cumulative += group.count;
if (cumulative >= query) {
cumulative -= group.count;
break;
}
}
for (var j = 0; j < group.instances.length; j++) {
instance = group.instances[j];
cumulative += instance.count;
if (cumulative >= query) {
cumulative -= instance.count;
break;
}
}
var string = strings[instance.s].substr(instance.i, instance.j + query - cumulative - 1);
process.stdout.write(string + "\n");
}
}
}
process.stdin.resume();
process.stdin.setEncoding("ascii");
_input = "";
process.stdin.on("data", function (input) {
_input += input;
});
process.stdin.on("end", function () {
processData(_input);
});
Find Strings Python Solution
from operator import attrgetter
def lcp(s, t):
length = min(len(s), len(t))
for i in range(length):
if s[i] != t[i]:
return i
return length
class Suffix(object):
def __init__(self, s):
self.t = s
self.start = 0
self.c = -1
def count(self, s):
if s:
self.start = lcp(self.t, s.t)
self.c = len(self.t) - self.start
class SuffixArray(object):
def __init__(self):
self.suffixes = []
def add(self, s):
for i in range(len(s)):
self.suffixes.append(Suffix(s[i:]))
def select(self, i):
for j in range(len(self.suffixes)):
suffix = self.suffixes[j]
if suffix.c == -1:
if j == 0:
suffix.count(None)
else:
suffix.count(self.suffixes[j - 1])
if i <= suffix.c:
return suffix.t[:suffix.start + i]
else:
i = i - suffix.c
return 'INVALID'
def makeSuffixArray():
sa = SuffixArray()
n = int(input())
for i in range(n):
w = input()
sa.add(w)
sa.suffixes.sort(key = attrgetter('t'))
return sa
def selectOutput():
sa = makeSuffixArray()
q = int(input())
for i in range(q):
k = int(input())
print(sa.select(k))
selectOutput()