HackerRank Counting Road Networks Solution

In this post, we will solve HackerRank Counting Road Networks Problem Solution.

Lukas is a Civil Engineer who loves designing road networks to connect n cities numbered from 1 to n. He can build any number of bidirectional roads as long as the resultant network satisfies these constraints:

It must be possible to reach any city from any other city by traveling along the network of roads.
No two roads can directly connect the same two cities.
A road cannot directly connect a city to itself.
In other words, the roads and cities must form a simple connected labeled graph.
You must answer q queries, where each query consists of some n denoting the number of cities Lukas wants to design a bidirectional network of roads for. For each query, find and print the number of ways he can build roads connecting n cities on a new line; as the number of ways can be quite large, print it modulo 663224321.
Input Format
The first line contains an integer, q, denoting the number of queries. Each of the q subsequent lines contains an integer denoting the value of n for a query.

Output Format

For each of the q queries, print the number of ways Lukas can build a network of bidirectional roads connecting n cities, modulo 663224321, on a new line.

Sample Input 0

Sample Output 0

1
4
201986643

Explanation 0
We answer the first two queries like this:

When n = 1, the only option satisfying Lukas’ three constraints is to not build any roads at all. Thus, we print the result of 1 mod 663224321 = 1 on a new line.
When n = 3, there are four ways for Lukas to build roads that satisfy his three constraints:
Thus, we print the result of 4 mod 663224321 = 4 on a new line..

HackerRank Counting Road Networks Problem Solution

Counting Road Networks C++ Solution

#include "bits/stdc++.h"
using namespace std;
#define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i))
#define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i))
#define reu(i,l,u) for(int (i)=(int)(l);(i)<(int)(u);++(i))
static const int INF = 0x3f3f3f3f; static const long long INFL = 0x3f3f3f3f3f3f3f3fLL;
typedef vector<int> vi; typedef pair<int, int> pii; typedef vector<pair<int, int> > vpii; typedef long long ll;
template<typename T, typename U> static void amin(T &x, U y) { if (y < x) x = y; }
template<typename T, typename U> static void amax(T &x, U y) { if (x < y) x = y; }


#pragma GCC optimize ("O3")
#pragma GCC target ("sse4")

namespace uint_util {
	template<typename T> struct Utils {};

	template<> struct Utils<uint32_t> {
		static void umul_full(uint32_t a, uint32_t b, uint32_t *lo, uint32_t *hi) {
			const uint64_t c = (uint64_t)a * b;
			*lo = (uint32_t)c;
			*hi = (uint32_t)(c >> 32);
		}
		static  uint32_t umul_hi(uint32_t a, uint32_t b) {
			return (uint32_t)((uint64_t)a * b >> 32);
		}
		static uint32_t mulmod_invert(uint32_t b, uint32_t n) {
			return ((uint64_t)b << 32) / n;
		}

		static uint32_t umul_lo(uint32_t a, uint32_t b) {
			return a * b;
		}
		static uint32_t mulmod_precalculated(uint32_t a, uint32_t b, uint32_t n, uint32_t bninv) {
			const auto q = umul_hi(a, bninv);
			uint32_t r = a * b - q * n;
			if (r >= n) r -= n;
			return r;
		}

		static uint32_t invert_twoadic(uint32_t x) {
			uint32_t i = x, p;
			do {
				p = i * x;
				i *= 2 - p;
			} while (p != 1);
			return i;
		}
	};

}

namespace modnum {

	template<typename NumType> struct ModNumTypes {
		using Util = uint_util::Utils<NumType>;

		template<int Lazy> struct LazyModNum;

		//x < Lazy * P
		template<int Lazy>
		struct LazyModNum {
			NumType x;
			LazyModNum() : x() {}
			template<int L>
			explicit LazyModNum(LazyModNum<L> t) : x(t.x) { static_assert(L <= Lazy, "invalid conversion"); }

			static LazyModNum raw(NumType x) {
				LazyModNum r; r.x = x;
				return r;
			}

			template<int L>
			static LazyModNum *coerceArray(LazyModNum<L> *a) { return reinterpret_cast<LazyModNum*>(a); }

			bool operator==(LazyModNum that) const {
				static_assert(Lazy == 1, "cannot compare");
				return x == that.x;
			}
		};

		typedef LazyModNum<1> ModNum;

		class ModInfo {
		public:
			enum {
				MAX_ROOT_ORDER = 23
			};

		private:
			NumType P, P2;
			ModNum _one;
			NumType _twoadic_inverse;
			NumType _order;
			NumType _one_P_inv;	//floor(W * (W rem P) / P)

			bool _support_fft;
			ModNum _roots[MAX_ROOT_ORDER + 1], _inv_roots[MAX_ROOT_ORDER + 1];
			ModNum _inv_two_powers[MAX_ROOT_ORDER + 1];

		public:
			NumType getP() const { return P; }
			NumType get_twoadic_inverse() const { return _twoadic_inverse; }

			ModNum one() const { return _one; }

			ModNum to_alt(NumType x) const {
				return ModNum::raw(Util::mulmod_precalculated(x, _one.x, P, _one_P_inv));
			}
			NumType from_alt(ModNum x) const {
				return _reduce(x.x, 0);
			}

			bool support_fft() const { return _support_fft; }

			ModNum root(int n) const {
				assert(support_fft());
				if (n > 0) {
					assert(n <= MAX_ROOT_ORDER);
					return _roots[n];
				} else if (n < 0) {
					assert(n >= -MAX_ROOT_ORDER);
					return _inv_roots[-n];
				} else {
					return one();
				}
			}

			ModNum inv_two_power(int n) const {
				assert(support_fft());
				assert(0 <= n && n <= MAX_ROOT_ORDER);
				return _inv_two_powers[n];
			}

			ModNum add(ModNum a, ModNum b) const {
				auto c = a.x + b.x;
				if (c >= P) c -= P;
				return ModNum::raw(c);
			}

			ModNum sub(ModNum a, ModNum b) const {
				auto c = a.x + (P - b.x);
				if (c >= P) c -= P;
				return ModNum::raw(c);
			}

			LazyModNum<4> add_lazy(LazyModNum<2> a, LazyModNum<2> b) const {
				return LazyModNum<4>::raw(a.x + b.x);
			}
			LazyModNum<4> sub_lazy(LazyModNum<2> a, LazyModNum<2> b) const {
				return LazyModNum<4>::raw(a.x + (P2 - b.x));
			}

			ModNum mul(ModNum a, ModNum b) const {
				NumType lo, hi;
				Util::umul_full(a.x, b.x, &lo, &hi);
				return ModNum::raw(_reduce(lo, hi));
			}
			ModNum sqr(ModNum a) const {
				return mul(a, a);
			}

			template<int LA, int LB>
			LazyModNum<2> mul_lazy(LazyModNum<LA> a, LazyModNum<LB> b) const {
				static_assert(LA + LB <= 5, "too lazy");
				NumType lo, hi;
				Util::umul_full(a.x, b.x, &lo, &hi);
				return LazyModNum<2>::raw(_reduce_lazy(lo, hi));
			}

			ModNum pow(ModNum a, NumType k) const {
				LazyModNum<2> base{ a }, res{ one() };
				while (1) {
					if (k & 1) res = mul_lazy(res, base);
					if (k >>= 1) base = mul_lazy(base, base);
					else break;
				}
				return lazy_reduce_1(res);
			}

			ModNum inverse(ModNum a) const {
				return pow(a, _order - 1);
			}

			//a < 2P, res < P
			ModNum lazy_reduce_1(LazyModNum<2> a) const {
				NumType x = a.x;
				if (x >= P) x -= P;
				return ModNum::raw(x);
			}
			//a < 4P, res < 2P
			LazyModNum<2> lazy_reduce_2(LazyModNum<4> a) const {
				NumType x = a.x;
				if (x >= P2) x -= P2;
				return LazyModNum<2>::raw(x);
			}

		private:
			NumType _reduce(NumType lo, NumType hi) const {
				const auto q = Util::umul_lo(lo, _twoadic_inverse);
				const auto h = Util::umul_hi(q, P);
				NumType t = hi + P - h;
				if (t >= P) t -= P;
				return t;
			}

			NumType _reduce_lazy(NumType lo, NumType hi) const {
				const auto q = Util::umul_lo(lo, _twoadic_inverse);
				const auto h = Util::umul_hi(q, P);
				return hi + P - h;
			}

		public:
			static ModInfo make(NumType P, NumType order = NumType(-1)) {
				ModInfo res;

				res.P = P;
				res.P2 = P * 2;
				res._one.x = ~Util::umul_lo(Util::mulmod_invert(1, P), P) + 1;
				res._order = order == NumType(-1) ? P - 1 : order;
				res._twoadic_inverse = Util::invert_twoadic(P);

				res._one_P_inv = Util::mulmod_invert(res._one.x, P);

				res._support_fft = false;

				assert(res.mul(res.one(), res.one()) == res.one());

				return res;
			}

			static ModInfo make_support_fft(NumType P, NumType order, NumType original_root, int valuation) {
				ModInfo res = make(P, order);
				_compute_fft_info(res, original_root, valuation);
				return res;
			}

		private:
			static void _compute_fft_info(ModInfo &res, NumType original_root, int valuation) {
				assert(res.P <= 1ULL << (sizeof(NumType) * 8 - 2));
				assert(valuation >= MAX_ROOT_ORDER);

				res._support_fft = true;

				ModNum max_root = res.to_alt(original_root);
				for (int i = valuation; i > MAX_ROOT_ORDER; -- i)
					max_root = res.sqr(max_root);

				res._roots[MAX_ROOT_ORDER] = max_root;
				for (int i = MAX_ROOT_ORDER - 1; i >= 0; -- i)
					res._roots[i] = res.sqr(res._roots[i + 1]);

				res._inv_roots[MAX_ROOT_ORDER] = res.inverse(max_root);
				for (int i = MAX_ROOT_ORDER - 1; i >= 0; -- i)
					res._inv_roots[i] = res.sqr(res._inv_roots[i + 1]);

				res._inv_two_powers[0] = res.one();
				res._inv_two_powers[1] = res.inverse(res.add(res.one(), res.one()));
				for (int i = 1; i < MAX_ROOT_ORDER; ++ i)
					res._inv_two_powers[i] = res.mul(res._inv_two_powers[1], res._inv_two_powers[i - 1]);

				assert(res.mul(res._roots[1], res._inv_roots[1]) == res.one());
				assert(res.root(0) == res.one());
				assert(!(res.root(1) == res.one()));
			}
		};
	};

}

namespace fft {

	using namespace modnum;

	using NumType = uint32_t;

	using ModNumType = ModNumTypes<NumType>;
	template<int Lazy>
	using LazyModNum = ModNumType::LazyModNum<Lazy>;
	using ModNum = ModNumType::ModNum;
	using ModInfo = ModNumType::ModInfo;

	using ModNumType32 = ModNumTypes<uint32_t>;
	using ModNum32 = ModNumType32::ModNum;
	using ModInfo32 = ModNumType32::ModInfo;

	inline __m128i mod_lazy_reduce_2_sse2(const __m128i &a, const __m128i &p2, const __m128i &signbit) {
		const auto mask = _mm_cmpgt_epi32(_mm_xor_si128(p2, signbit), _mm_xor_si128(a, signbit));
		const auto sub = _mm_andnot_si128(mask, p2);
		return _mm_sub_epi32(a, sub);
	}

	inline __m128i mod_reduce_lazy_sse2(const __m128i &a, const __m128i &p, const __m128i &twoadic_inverse) {
		const auto q = _mm_mul_epu32(a, twoadic_inverse);
		const auto h = _mm_shuffle_epi32(_mm_mul_epu32(q, p), _MM_SHUFFLE(3, 3, 1, 1));
		return _mm_add_epi32(a, _mm_sub_epi32(p, h));
	}

	inline __m128i mod_mul_lazy_sse2(const __m128i &a, const __m128i &b, const __m128i &p, const __m128i &twoadic_inverse) {
		const auto a02 = _mm_shuffle_epi32(a, _MM_SHUFFLE(2, 2, 0, 0));
		const auto a13 = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 3, 1, 1));
		const auto b02 = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 2, 0, 0));
		const auto b13 = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));

		const auto prod02 = _mm_mul_epu32(a02, b02);
		const auto prod13 = _mm_mul_epu32(a13, b13);

		const auto res02 = mod_reduce_lazy_sse2(prod02, p, twoadic_inverse);
		const auto res13 = mod_reduce_lazy_sse2(prod13, p, twoadic_inverse);

		const auto shuffled02 = _mm_shuffle_epi32(res02, _MM_SHUFFLE(0, 0, 3, 1));
		const auto shuffled13 = _mm_shuffle_epi32(res13, _MM_SHUFFLE(0, 0, 3, 1));

		return _mm_unpacklo_epi32(shuffled02, shuffled13);
	}

	inline __m128i mod_mul_sse2(const __m128i &a, const __m128i &b, const __m128i &p, const __m128i &twoadic_inverse) {
		__m128i t = mod_mul_lazy_sse2(a, b, p, twoadic_inverse);
		const auto mask = _mm_cmpgt_epi32(p, t);	//signed compare
		const auto sub = _mm_andnot_si128(mask, p);
		return _mm_sub_epi32(t, sub);
	}

	inline __m128i mod_add_lazy_sse2(const __m128i &a, const __m128i &b) {
		return _mm_add_epi32(a, b);
	}
	inline __m128i mod_sub_lazy_sse2(const __m128i &a, const __m128i &b, const __m128i &p2) {
		return _mm_add_epi32(a, _mm_sub_epi32(p2, b));
	}

	void ntt_dit_lazy_core_sse2(LazyModNum<2> *f_inout, int n, int sign, const ModInfo &mod) {
		LazyModNum<4> * const f = LazyModNum<4>::coerceArray(f_inout);

		int N = 1 << n;

		if (n <= 1) {
			if (n == 0)
				return;

			const auto a = f_inout[0];
			const auto b = f_inout[1];

			f_inout[0] = mod.lazy_reduce_2(mod.add_lazy(a, b));
			f_inout[1] = mod.lazy_reduce_2(mod.sub_lazy(a, b));
			return;
		}

		if (n & 1) {
			for (int i = 0; i < N; i += 2) {
				const auto a = f_inout[i + 0];
				const auto b = f_inout[i + 1];

				f[i + 0] = mod.add_lazy(a, b);
				f[i + 1] = mod.sub_lazy(a, b);
			}
		}

		if ((n & 1) == 0) {
			const auto imag = mod.root(2 * sign);
			for (int i = 0; i < N; i += 4) {
				const auto a0 = f_inout[i + 0];
				const auto a2 = f_inout[i + 1];
				const auto a1 = f_inout[i + 2];
				const auto a3 = f_inout[i + 3];

				const auto t02 = mod.lazy_reduce_2(mod.add_lazy(a0, a2));
				const auto t13 = mod.lazy_reduce_2(mod.add_lazy(a1, a3));

				f[i + 0] = mod.add_lazy(t02, t13);
				f[i + 2] = mod.sub_lazy(t02, t13);

				const auto u02 = mod.lazy_reduce_2(mod.sub_lazy(a0, a2));
				const auto u13 = mod.mul_lazy(mod.sub_lazy(a1, a3), imag);

				f[i + 1] = mod.add_lazy(u02, u13);
				f[i + 3] = mod.sub_lazy(u02, u13);
			}
		} else {
			const auto imag = mod.root(2 * sign);
			const auto omega = mod.root(3 * sign);

			for (int i = 0; i < N; i += 8) {
				const auto a0 = mod.lazy_reduce_2(f[i + 0]);
				const auto a2 = mod.lazy_reduce_2(f[i + 2]);
				const auto a1 = mod.lazy_reduce_2(f[i + 4]);
				const auto a3 = mod.lazy_reduce_2(f[i + 6]);

				const auto t02 = mod.lazy_reduce_2(mod.add_lazy(a0, a2));
				const auto t13 = mod.lazy_reduce_2(mod.add_lazy(a1, a3));

				f[i + 0] = mod.add_lazy(t02, t13);
				f[i + 4] = mod.sub_lazy(t02, t13);

				const auto u02 = mod.lazy_reduce_2(mod.sub_lazy(a0, a2));
				const auto u13 = mod.mul_lazy(mod.sub_lazy(a1, a3), imag);

				f[i + 2] = mod.add_lazy(u02, u13);
				f[i + 6] = mod.sub_lazy(u02, u13);
			}

			ModNum w = omega, w2 = imag, w3 = mod.mul(w2, w);

			for (int i = 1; i < N; i += 8) {
				const auto a0 = mod.lazy_reduce_2(f[i + 0]);
				const auto a2 = mod.mul_lazy(f[i + 2], w2);
				const auto a1 = mod.mul_lazy(f[i + 4], w);
				const auto a3 = mod.mul_lazy(f[i + 6], w3);

				const auto t02 = mod.lazy_reduce_2(mod.add_lazy(a0, a2));
				const auto t13 = mod.lazy_reduce_2(mod.add_lazy(a1, a3));

				f[i + 0] = mod.add_lazy(t02, t13);
				f[i + 4] = mod.sub_lazy(t02, t13);

				const auto u02 = mod.lazy_reduce_2(mod.sub_lazy(a0, a2));
				const auto u13 = mod.mul_lazy(mod.sub_lazy(a1, a3), imag);

				f[i + 2] = mod.add_lazy(u02, u13);
				f[i + 6] = mod.sub_lazy(u02, u13);
			}
		}

		for (int m = 4 + (n & 1); m <= n; m += 2) {
			int M = 1 << m, M_4 = M >> 2;
			const auto o = mod.root(m * sign), o2 = mod.root((m - 1) * sign), o4 = mod.root((m - 2) * sign);

			const auto p = _mm_set1_epi32(mod.getP());
			const auto p2 = _mm_set1_epi32(mod.getP() * 2);
			const auto twoadic_inverse = _mm_set1_epi32(mod.get_twoadic_inverse());
			const auto imag = _mm_set1_epi32(mod.root(2 * sign).x);
			const auto omega = _mm_set1_epi32(o4.x);
			const auto signbit = _mm_set1_epi32((int)(1U << 31));

			__m128i w = _mm_set_epi32(mod.mul(o, o2).x, o2.x, o.x, mod.one().x);

			for (int j = 0; j < M_4; j += 4) {
				const auto w2 = mod_mul_sse2(w, w, p, twoadic_inverse);
				const auto w3 = mod_mul_sse2(w2, w, p, twoadic_inverse);

				for (int i = j; i < N; i += M) {
					const auto f0 = _mm_loadu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 0));
					const auto f1 = _mm_loadu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 1));
					const auto f2 = _mm_loadu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 2));
					const auto f3 = _mm_loadu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 3));

					const auto a0 = mod_lazy_reduce_2_sse2(f0, p2, signbit);
					const auto a2 = mod_mul_lazy_sse2(f1, w2, p, twoadic_inverse);
					const auto a1 = mod_mul_lazy_sse2(f2, w, p, twoadic_inverse);
					const auto a3 = mod_mul_lazy_sse2(f3, w3, p, twoadic_inverse);

					const auto t02 = mod_lazy_reduce_2_sse2(mod_add_lazy_sse2(a0, a2), p2, signbit);
					const auto t13 = mod_lazy_reduce_2_sse2(mod_add_lazy_sse2(a1, a3), p2, signbit);

					const auto r0 = mod_add_lazy_sse2(t02, t13);
					const auto r2 = mod_sub_lazy_sse2(t02, t13, p2);

					const auto u02 = mod_lazy_reduce_2_sse2(mod_sub_lazy_sse2(a0, a2, p2), p2, signbit);
					const auto u13 = mod_mul_lazy_sse2(mod_sub_lazy_sse2(a1, a3, p2), imag, p, twoadic_inverse);

					const auto r1 = mod_add_lazy_sse2(u02, u13);
					const auto r3 = mod_sub_lazy_sse2(u02, u13, p2);

					_mm_storeu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 0), r0);
					_mm_storeu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 1), r1);
					_mm_storeu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 2), r2);
					_mm_storeu_si128(reinterpret_cast<__m128i*>(f + i + M_4 * 3), r3);
				}

				w = mod_mul_sse2(w, omega, p, twoadic_inverse);
			}
		}

		for (int i = 0; i < N; ++ i)
			f_inout[i] = mod.lazy_reduce_2(f[i]);
	}

	void ntt_dit_lazy_core(LazyModNum<2> *f_inout, int n, int sign, const ModInfo &mod) {
		ntt_dit_lazy_core_sse2(f_inout, n, sign, mod);
	}

	template<typename T>
	void bit_reverse_permute(T *f, int n) {
		int N = 1 << n, N_2 = N >> 1, r = 0;
		for (int x = 1; x < N; ++ x) {
			int h = N_2;
			while (((r ^= h) & h) == 0) h >>= 1;
			if (r > x) swap(f[x], f[r]);
		}
	}

	void ntt_dit_lazy(LazyModNum<2> *f, int n, int sign, const ModInfo &mod) {
		bit_reverse_permute(f, n);
		ntt_dit_lazy_core(f, n, sign, mod);
	}

	template<int LF, int LG>
	void componentwise_product_lazy(LazyModNum<2> *res, const LazyModNum<LF> *f, const LazyModNum<LG> *g, int N, const ModInfo &mod) {
		for (int i = 0; i < N; ++ i)
			res[i] = mod.mul_lazy(f[i], g[i]);
	}

	void normalize_and_lazy_reduce(LazyModNum<2> *f, int n, const ModInfo &mod) {
		const auto f_out = ModNum::coerceArray(f);
		int N = 1 << n;
		ModNum inv = mod.inv_two_power(n);
		assert(mod.mul(inv, mod.to_alt(N)) == mod.one());
		for (int i = 0; i < N; ++ i)
			f_out[i] = mod.lazy_reduce_1(mod.mul_lazy(f[i], inv));
	}

	void convolute(ModNum *f_in, ModNum *g_in, int n, const ModInfo &mod) {
		assert(mod.support_fft());
		const auto f = LazyModNum<2>::coerceArray(f_in);
		const auto g = LazyModNum<2>::coerceArray(g_in);
		ntt_dit_lazy(f, n, +1, mod);
		ntt_dit_lazy(g, n, +1, mod);
		componentwise_product_lazy(f, f, g, 1 << n, mod);
		ntt_dit_lazy(f, n, -1, mod);
		normalize_and_lazy_reduce(f, n, mod);
	}

	void auto_convolute(ModNum *f_in, int n, const ModInfo &mod) {
		assert(mod.support_fft());
		const auto f = LazyModNum<2>::coerceArray(f_in);
		ntt_dit_lazy(f, n, +1, mod);
		componentwise_product_lazy(f, f, f, 1 << n, mod);
		ntt_dit_lazy(f, n, -1, mod);
		normalize_and_lazy_reduce(f, n, mod);
	}

	enum { MULTIPRIME_NUM = 3 };

	static const ModInfo fft_prime_mod0 = ModInfo::make_support_fft(998244353, -1, 31, 23);
	static const ModInfo fft_prime_mod1 = ModInfo::make_support_fft(897581057, -1, 45, 23);
	static const ModInfo fft_prime_mod2 = ModInfo::make_support_fft(880803841, -1, 211, 23);
	const ModInfo * const fft_prime_mods[MULTIPRIME_NUM] = { &fft_prime_mod0, &fft_prime_mod1, &fft_prime_mod2 };

	void multiprime_compose(ModNum32 *res, const ModInfo32 &mod_res, const ModNum *f[MULTIPRIME_NUM], int N, const ModInfo * const mods[MULTIPRIME_NUM]) {
		const auto f0 = f[0], f1 = f[1], f2 = f[2];
		const auto &mod0 = *mods[0], &mod1 = *mods[1], &mod2 = *mods[2];
		const auto P0 = mod0.getP(), P1 = mod1.getP(), P2 = mod2.getP();
		const auto P_res = mod_res.getP();
		const auto t1 = mod1.inverse(mod1.to_alt(P0));
		const auto t2 = mod2.inverse(mod2.to_alt((uint64_t)P0 * P1 % P2));
		const auto p01 = mod_res.to_alt((uint64_t)P0 * P1 % P_res);

		for (int i = 0; i < N; ++ i) {
			const auto a0 = mod0.from_alt(f0[i]), a1 = mod1.from_alt(f1[i]), a2 = mod2.from_alt(f2[i]);
			const auto d1 = mod1.sub(mod1.to_alt(a1), mod1.to_alt(a0));
			const auto h1 = mod1.from_alt(mod1.mul(d1, t1));
			const auto a01 = a0 + (uint64_t)P0 * h1;
			const auto d2 = mod2.sub(mod2.to_alt(a2), mod2.to_alt(a01 % P2));
			const auto h2 = mod2.from_alt(mod2.mul(d2, t2));
			res[i] = mod_res.add(mod_res.to_alt(a01 % P_res), mod_res.mul(mod_res.to_alt(h2 % P_res), p01));
		}
	}

	void multiprime_decompose(ModNum *res[MULTIPRIME_NUM], const ModNum32 *f, int N, const ModInfo32 &f_mod, const ModInfo * const mods[MULTIPRIME_NUM]) {
		for (int i = 0; i < N; ++ i) {
			const auto a = f_mod.from_alt(f[i]);
			for (int k = 0; k < MULTIPRIME_NUM; ++ k)
				res[k][i] = mods[k]->to_alt(a);
		}
	}

	void multiprime_convolute(ModNum32 *res, int resN, const ModNum32 *f, int fN, const ModNum32 *g, int gN, int n, const ModInfo32 &mod) {
		int N = 1 << n;
		assert(fN <= N && gN <= N && resN <= N);
		//implicit zero-fill
		unique_ptr<ModNum[]> workspace(new ModNum[N * MULTIPRIME_NUM * 2]);
		ModNum *fs[MULTIPRIME_NUM], *gs[MULTIPRIME_NUM];
		for (int k = 0; k < MULTIPRIME_NUM; ++ k) {
			fs[k] = workspace.get() + (k * 2 + 0) * N;
			gs[k] = workspace.get() + (k * 2 + 1) * N;
		}
		multiprime_decompose(fs, f, fN, mod, fft_prime_mods);
		multiprime_decompose(gs, g, gN, mod, fft_prime_mods);
		for (int k = 0; k < MULTIPRIME_NUM; ++ k)
			convolute(fs[k], gs[k], n, *fft_prime_mods[k]);
		multiprime_compose(res, mod, const_cast<const ModNum **>(fs), resN, fft_prime_mods);
	}

	void multiprime_auto_convolute(ModNum32 *res, int resN, const ModNum32 *f, int fN, int n, const ModInfo32 &mod) {
		int N = 1 << n;
		assert(fN <= N && resN <= N);
		unique_ptr<ModNum[]> workspace(new ModNum[N * MULTIPRIME_NUM]);
		ModNum *fs[MULTIPRIME_NUM];
		for (int k = 0; k < MULTIPRIME_NUM; ++ k)
			fs[k] = workspace.get() + k * N;
		multiprime_decompose(fs, f, fN, mod, fft_prime_mods);
		for (int k = 0; k < MULTIPRIME_NUM; ++ k)
			auto_convolute(fs[k], n, *fft_prime_mods[k]);
		multiprime_compose(res, mod, const_cast<const ModNum **>(fs), resN, fft_prime_mods);
	}

}

struct ModInt {
	using NumType = uint32_t;
	using ModNumType = modnum::ModNumTypes<NumType>;
	using ModNum = ModNumType::ModNum;
	using ModInfo = ModNumType::ModInfo;

public:
	ModNum x;

	ModInt() : x() {}
	ModInt(NumType num) : x(mod.to_alt(num)) {}
	ModInt(int num) : x(mod.to_alt(num >= 0 ? num : mod.getP() + num % (int)mod.getP())) {}

	NumType get() const { return mod.from_alt(x); }

	static ModInt raw(ModNum x) { ModInt r; r.x = x; return r; }
	static ModInt one() { return raw(mod.one()); }

	ModInt operator+(ModInt that) const { return raw(mod.add(x, that.x)); }
	ModInt &operator+=(ModInt that) { return *this = *this + that; }

	ModInt operator-(ModInt that) const { return raw(mod.sub(x, that.x)); }
	ModInt &operator-=(ModInt that) { return *this = *this - that; }

	ModInt operator-() const { return raw(mod.sub(ModNum(), x)); }

	ModInt operator*(ModInt that) const { return raw(mod.mul(x, that.x)); }
	ModInt &operator*=(ModInt that) { return *this = *this * that; }

	ModInt inverse() const { return raw(mod.inverse(x)); }
	ModInt operator/(ModInt that) const { return *this * that.inverse(); }
	ModInt &operator/=(ModInt that) { return *this = *this / that.inverse(); }

	bool operator==(ModInt that) const { return x == that.x; }
	bool operator!=(ModInt that) const { return !(*this == that); }

private:
	static ModInfo mod;

public:
	static const ModInfo &get_mod_info() { return mod; }
	static NumType getMod() { return mod.getP(); }

	static void set_mod(NumType P, NumType order = -1) {
		mod = ModInfo::make(P, order);
	}
};

ModInt::ModInfo ModInt::mod;
typedef ModInt mint;

namespace mod_polynomial {

	struct Polynomial {
		typedef mint R;
		static R ZeroR() { return R(); }
		static R OneR() { return R::one(); }
		static bool IsZeroR(R r) { return r == ZeroR(); }

		struct NumberTable {
			std::vector<R> natural_numbers;
			std::vector<R> inverse_numbers;
			std::vector<R> factorials;
			std::vector<R> inverse_factorials;

			int size() const { return (int)natural_numbers.size(); }
		};

		std::vector<R> coeffs;

		Polynomial() {}
		explicit Polynomial(R c0) : coeffs(1, c0) {}
		explicit Polynomial(R c0, R c1) : coeffs(2) { coeffs[0] = c0, coeffs[1] = c1; }
		template<typename It> Polynomial(It be, It en) : coeffs(be, en) {}

		static Polynomial Zero() { return Polynomial(); }
		static Polynomial One() { return Polynomial(OneR()); }
		static Polynomial X() { return Polynomial(ZeroR(), OneR()); }

		void resize(int n) { coeffs.resize(n); }
		void clear() { coeffs.clear(); }

		R *data() { return coeffs.empty() ? NULL : &coeffs[0]; }
		const R *data() const { return coeffs.empty() ? NULL : &coeffs[0]; }

		int size() const { return static_cast<int>(coeffs.size()); }
		bool empty() const { return coeffs.empty(); }
		int degree() const { return size() - 1; }

		bool normalized() const { return coeffs.empty() || coeffs.back() != ZeroR(); }
		bool monic() const { return !coeffs.empty() && coeffs.back() == OneR(); }

		R get(int i) const { return 0 <= i && i < size() ? coeffs[i] : ZeroR(); }

		void set(int i, R x) {
			if (size() <= i)
				resize(i + 1);
			coeffs[i] = x;
		}

		void normalize() { while (!empty() && IsZeroR(coeffs.back())) coeffs.pop_back(); }

		R evaluate(R x) const {
			if (empty()) return R();
			R r = coeffs.back();
			for (int i = size() - 2; i >= 0; -- i) {
				r *= x;
				r += coeffs[i];
			}
			return r;
		}

		Polynomial &operator+=(const Polynomial &that) {
			int m = size(), n = that.size();
			if (m < n) resize(n);
			_add(data(), that.data(), n);
			return *this;
		}
		Polynomial operator+(const Polynomial &that) const {
			return Polynomial(*this) += that;
		}

		Polynomial &operator-=(const Polynomial &that) {
			int m = size(), n = that.size();
			if (m < n) resize(n);
			_subtract(data(), that.data(), n);
			return *this;
		}
		Polynomial operator-(const Polynomial &that) const {
			return Polynomial(*this) -= that;
		}

		Polynomial &operator*=(R r) {
			_multiply_1(data(), size(), r);
			return *this;
		}
		Polynomial operator*(R r) const {
			Polynomial res;
			res.resize(size());
			_multiply_1(res.data(), data(), size(), r);
			return res;
		}

		Polynomial operator*(const Polynomial &that) const {
			Polynomial r;
			multiply(r, *this, that);
			return r;
		}

		Polynomial &operator*=(const Polynomial &that) {
			multiply(*this, *this, that);
			return *this;
		}

		static void multiply(Polynomial &res, const Polynomial &p, const Polynomial &q) {
			int pn = p.size(), qn = q.size();

			if (pn < qn)
				return multiply(res, q, p);

			if (&res == &p || &res == &q) {
				Polynomial tmp;
				multiply(tmp, p, q);
				res = tmp;
				return;
			}

			if (qn == 0) {
				res.coeffs.clear();
			} else {
				res.resize(pn + qn - 1);
				_multiply_select_method(res.data(), p.data(), pn, q.data(), qn);
			}
		}

		Polynomial operator-() const {
			Polynomial res;
			res.resize(size());
			_negate(res.data(), data(), size());
			return res;
		}

		Polynomial precomputeRevInverse(int n) const {
			Polynomial res;
			res.resize(n);
			_precompute_reverse_inverse(res.data(), n, data(), size());
			return res;
		}

		static void divideRemainderPrecomputedRevInverse(Polynomial &quot, Polynomial &rem, const Polynomial &p, const Polynomial &q, const Polynomial &inv) {
			assert(&quot != &p && &quot != &q && &quot != &inv);
			int pn = p.size(), qn = q.size();
			assert(inv.size() >= pn - qn + 1);
			quot.resize(std::max(0, pn - qn + 1));
			rem.resize(qn - 1);
			_divide_remainder_precomputed_inverse(quot.data(), rem.data(), p.data(), pn, q.data(), qn, inv.data());
			quot.normalize();
			rem.normalize();
		}

		Polynomial computeRemainderPrecomputedRevInverse(const Polynomial &q, const Polynomial &inv) const {
			Polynomial quot, rem;
			divideRemainderPrecomputedRevInverse(quot, rem, *this, q, inv);
			return rem;
		}

		Polynomial powerMod(long long K, const Polynomial &q) const {
			int qn = q.size();
			assert(K >= 0 && qn > 0);
			assert(q.monic());
			if (qn == 1) return Polynomial();
			if (K == 0) return One();
			Polynomial inv = q.precomputeRevInverse(std::max(size() - qn + 1, qn));
			Polynomial p = this->computeRemainderPrecomputedRevInverse(q, inv);
			int l = 0;
			while ((K >> l) > 1) ++ l;
			Polynomial res = p;
			for (-- l; l >= 0; -- l) {
				res *= res;
				res = res.computeRemainderPrecomputedRevInverse(q, inv);
				if (K >> l & 1) {
					res *= p;
					res = res.computeRemainderPrecomputedRevInverse(q, inv);
				}
			}
			return res;
		}

		Polynomial reverse(int n) {
			assert(size() <= n);
			Polynomial r; r.resize(n);
			for (int i = 0; i < n; ++ i)
				r.set(n - 1 - i, get(i));
			return r;
		}

		static void clearNumberTable() {
			_numberTable = NumberTable();
		}

		static const NumberTable &getNumberTable(int size) {
			extendNumberTable(size);
			return _numberTable;
		}

		static void extendNumberTable(int size) {
			int old_size = _numberTable.size();
			if (old_size >= size)
				return;
			if (old_size * 2 > size)
				size = old_size * 2;
			NumberTable &nt = _numberTable;
			nt.natural_numbers.resize(size);
			nt.inverse_numbers.resize(size);
			nt.factorials.resize(size);
			nt.inverse_factorials.resize(size);
			if (old_size == 0) {
				nt.natural_numbers[0] = ZeroR();
				nt.inverse_numbers[0] = ZeroR();
				nt.factorials[0] = OneR();
				nt.inverse_factorials[0] = OneR();
				++ old_size;
			}
			for (int n = old_size; n < size; ++ n) {
				nt.natural_numbers[n] = nt.natural_numbers[n - 1] + OneR();
				if (IsZeroR(nt.natural_numbers[n])) {
					std::cerr << "No inverse of zero: " << n << std::endl;
					std::abort();
				}
				nt.factorials[n] = nt.factorials[n - 1] * nt.natural_numbers[n];
			}
			nt.inverse_factorials[size - 1] = nt.factorials[size - 1].inverse();
			for (int n = size - 2; n >= old_size; -- n)
				nt.inverse_factorials[n] = nt.inverse_factorials[n + 1] * nt.natural_numbers[n + 1];
			for (int n = old_size; n < size; ++ n)
				nt.inverse_numbers[n] = nt.inverse_factorials[n] * nt.factorials[n - 1];
		}

		Polynomial derivative() const {
			if (empty()) return *this;
			Polynomial res;
			res.resize(size() - 1);
			_derivative(res.data(), data(), size(), getNumberTable(size()).natural_numbers.data());
			return res;
		}

		Polynomial integrate() const {
			Polynomial res;
			res.resize(size() + 1);
			_integral(res.data(), data(), size(), getNumberTable(size() + 1).inverse_numbers.data());
			return res;
		}

		Polynomial inverse(int n) const {
			Polynomial res;
			res.resize(n);
			_inverse_power_series(res.data(), n, data(), size());
			return res;
		}

		Polynomial logarithm(int n) const {
			Polynomial res;
			res.resize(n);
			const NumberTable &nt = getNumberTable(std::max(size(), n));
			_log_power_series(res.data(), n, data(), size(), nt.natural_numbers.data(), nt.inverse_numbers.data());
			return res;
		}

		Polynomial exponential(int n) const {
			Polynomial res;
			res.resize(n);
			const NumberTable &nt = getNumberTable(std::max(size(), n));
			_exp_power_series(res.data(), n, data(), size(), nt.natural_numbers.data(), nt.inverse_numbers.data());
			return res;
		}

		static int MULTIPRIME_FFT_THRESHOLD;

	private:
		static NumberTable _numberTable;

		class WorkSpaceStack;

		static void _fill_zero(R *res, int n);
		static void _copy(R *res, const R *p, int n);
		static void _negate(R *res, const R *p, int n);

		static void _add(R *p, const R *q, int n);
		static void _add(R *res, const R *p, int pn, const R *q, int qn);
		static void _subtract(R *p, const R *q, int n);
		static void _subtract(R *res, const R *p, int pn, const R *q, int qn);

		static void _multiply_select_method(R *res, const R *p, int pn, const R *q, int qn);
		static void _square_select_method(R *res, const R *p, int pn);

		static void _multiply_1(R *p, const R *q, int n, R c0);
		static void _multiply_1(R *p, int n, R c0);

		static void _multiply_power_of_two(R *res, const R *p, int n, int k);
		static void _divide_power_of_two(R *res, const R *p, int n, int k);

		static void _schoolbook_multiplication(R *res, const R *p, int pn, const R *q, int qn);

		static void _multiprime_fft(R *res, const R *p, int pn, const R *q, int qn);

		static void _reverse(R *res, const R *p, int pn);
		static void _inverse_power_series(R *res, int resn, const R *p, int pn);
		static void _precompute_reverse_inverse(R *res, int resn, const R *p, int pn);
		static void _divide_precomputed_inverse(R *res, int resn, const R *revp, int pn, const R *inv);
		static void _divide_remainder_precomputed_inverse(R *quot, R *rem, const R *p, int pn, const R *q, int qn, const R *inv);

		static void _derivative(R *res, const R *p, int pn, const R *natural_numbers);
		static void _integral(R *res, const R *p, int pn, const R *inverse_numbers);

		static void _log_power_series(R *res, int resn, const R *p, int pn, const R *natural_numbers, const R *inverse_numbers);
		static void _exp_power_series(R *res, int resn, const R *p, int pn, const R *natural_numbers, const R *inverse_numbers);
	};
	int Polynomial::MULTIPRIME_FFT_THRESHOLD = 8;
	Polynomial::NumberTable Polynomial::_numberTable;

	void Polynomial::_fill_zero(R *res, int n) {
		for (int i = 0; i < n; ++ i)
			res[i] = ZeroR();
	}

	void Polynomial::_copy(R *res, const R *p, int n) {
		for (int i = 0; i < n; ++ i)
			res[i] = p[i];
	}

	void Polynomial::_negate(R *res, const R *p, int n) {
		for (int i = 0; i < n; ++ i)
			res[i] = -p[i];
	}

	void Polynomial::_add(R *res, const R *p, int pn, const R *q, int qn) {
		for (int i = 0; i < qn; ++ i)
			res[i] = p[i] + q[i];
		_copy(res + qn, p + qn, pn - qn);
	}

	void Polynomial::_subtract(R *res, const R *p, int pn, const R *q, int qn) {
		for (int i = 0; i < qn; ++ i)
			res[i] = p[i] - q[i];
		_copy(res + qn, p + qn, pn - qn);
	}

	void Polynomial::_add(R *p, const R *q, int n) {
		_add(p, p, n, q, n);
	}

	void Polynomial::_subtract(R *p, const R *q, int n) {
		_subtract(p, p, n, q, n);
	}

	void Polynomial::_multiply_1(R *res, const R *p, int n, R c0) {
		for (int i = 0; i < n; ++ i)
			res[i] = p[i] * c0;
	}

	void Polynomial::_multiply_1(R *p, int n, R c0) {
		_multiply_1(p, p, n, c0);
	}

	void Polynomial::_multiply_power_of_two(R *res, const R *p, int n, int k) {
		assert(0 < k && k < 31);
		R mul = R(1 << k);
		_multiply_1(res, p, n, mul);
	}

	void Polynomial::_divide_power_of_two(R *res, const R *p, int n, int k) {
		assert(0 < k && k < 31);
		static const R Inv2 = R(2).inverse();
		R inv = k == 1 ? Inv2 : R(1 << k).inverse();
		_multiply_1(res, p, n, inv);
	}


	void Polynomial::_multiply_select_method(R *res, const R *p, int pn, const R *q, int qn) {
		if (pn < qn) std::swap(p, q), std::swap(pn, qn);
		assert(res != p && res != q && pn >= qn && qn > 0);
		int rn = pn + qn - 1;
		if (qn == 1) {
			_multiply_1(res, p, pn, q[0]);
		} else if (qn < MULTIPRIME_FFT_THRESHOLD) {
			_schoolbook_multiplication(res, p, pn, q, qn);
		} else {
			_multiprime_fft(res, p, pn, q, qn);
		}
	}

	void Polynomial::_square_select_method(R *res, const R *p, int pn) {
		_multiply_select_method(res, p, pn, p, pn);
	}

	void Polynomial::_schoolbook_multiplication(R *res, const R *p, int pn, const R *q, int qn) {
		if (qn == 1) {
			_multiply_1(res, p, pn, q[0]);
			return;
		}
		assert(res != p && res != q && pn >= qn && qn > 0);

		_fill_zero(res, pn + qn - 1);
		for (int i = 0; i < pn; ++ i)
			for (int j = 0; j < qn; ++ j)
				res[i + j] += p[i] * q[j];
	}

	void Polynomial::_multiprime_fft(R *res, const R *p, int pn, const R *q, int qn) {
		int resn = pn + qn - 1;

		int n = 0;
		while ((1 << n) < resn) ++ n;

		if (p == q) {
			assert(pn == qn);
			fft::multiprime_auto_convolute(reinterpret_cast<R::ModNum*>(res), resn, reinterpret_cast<const R::ModNum*>(p), pn, n, mint::get_mod_info());
		} else {
			fft::multiprime_convolute(reinterpret_cast<R::ModNum*>(res), resn, reinterpret_cast<const R::ModNum*>(p), pn, reinterpret_cast<const R::ModNum*>(q), qn, n, mint::get_mod_info());
		}
	}

	void Polynomial::_reverse(R *res, const R *p, int pn) {
		if (res == p) {
			std::reverse(res, res + pn);
		} else {
			for (int i = 0; i < pn; ++ i)
				res[pn - 1 - i] = p[i];
		}
	}

	void Polynomial::_inverse_power_series(R *res, int resn, const R *p, int pn) {
		if (resn == 0) return;
		assert(res != p);
		assert(pn > 0);
		assert(!IsZeroR(p[0]));
		if (p[0] != OneR()) {
			unique_ptr<R[]> tmpp(new R[pn]);
			R ic0 = p[0].inverse();
			_multiply_1(tmpp.get(), p, pn, ic0);
			_inverse_power_series(res, resn, tmpp.get(), pn);
			_multiply_1(res, resn, ic0);
			return;
		}
		unique_ptr<R[]> ws(new R[resn * 4]);
		R *tmp1 = ws.get(), *tmp2 = tmp1 + resn * 2;
		_fill_zero(res, resn);
		res[0] = p[0];
		int curn = 1;
		while (curn < resn) {
			int nextn = std::min(resn, curn * 2);
			_square_select_method(tmp1, res, curn);
			_multiply_select_method(tmp2, tmp1, std::min(nextn, curn * 2 - 1), p, std::min(nextn, pn));
			_multiply_power_of_two(res, res, curn, 1);
			_subtract(res, tmp2, nextn);
			curn = nextn;
		}
	}

	void Polynomial::_precompute_reverse_inverse(R *res, int resn, const R *p, int pn) {
		unique_ptr<R[]> ws(new R[pn]);
		R *tmp = ws.get();
		_reverse(tmp, p, pn);
		_inverse_power_series(res, resn, tmp, pn);
	}

	void Polynomial::_divide_precomputed_inverse(R *res, int resn, const R *revp, int pn, const R *inv) {
		unique_ptr<R[]> ws(new R[pn + resn]);
		R *tmp = ws.get();
		_multiply_select_method(tmp, revp, pn, inv, resn);
		_reverse(res, tmp, resn);
	}

	void Polynomial::_divide_remainder_precomputed_inverse(R *quot, R *rem, const R *p, int pn, const R *q, int qn, const R *inv) {
		if (pn < qn) {
			_copy(rem, p, pn);
			_fill_zero(rem + pn, qn - 1 - pn);
			return;
		}
		assert(qn > 0);
		assert(q[qn - 1] == OneR());
		if (qn == 1) return;
		int quotn = pn - qn + 1;
		int rn = qn - 1, tn = std::min(quotn, rn), un = tn + rn;
		unique_ptr<R[]> ws(new R[pn + un + (quot != NULL ? 0 : quotn)]);
		R *revp = ws.get(), *quotmul = revp + pn;
		if (quot == NULL) quot = quotmul + un;
		_reverse(revp, p, pn);
		_divide_precomputed_inverse(quot, quotn, revp, pn, inv);
		_multiply_select_method(quotmul, q, rn, quot, tn);
		_subtract(rem, p, rn, quotmul, rn);
	}

	void Polynomial::_derivative(R *res, const R *p, int pn, const R *natural_numbers) {
		for (int i = 1; i < pn; ++ i)
			res[i - 1] = p[i] * natural_numbers[i];
	}

	void Polynomial::_integral(R *res, const R *p, int pn, const R *inverse_numbers) {
		for (int i = pn - 1; i >= 0; -- i)
			res[i + 1] = p[i] * inverse_numbers[i + 1];
		res[0] = ZeroR();
	}

	void Polynomial::_log_power_series(R *res, int resn, const R *p, int pn, const R *natural_numbers, const R *inverse_numbers) {
		assert(pn > 0);
		assert(p[0] == OneR());
		if (pn == 1 || resn <= 1) {
			_fill_zero(res, resn);
			return;
		}
		unique_ptr<R[]> ws(new R[pn * 2 + resn * 2]);
		R *tmp1 = ws.get(), *tmp2 = tmp1 + pn, *tmp3 = tmp2 + resn;
		_derivative(tmp1, p, pn, natural_numbers);
		_inverse_power_series(tmp2, resn - 1, p, pn);
		_multiply_select_method(tmp3, tmp1, pn - 1, tmp2, resn - 1);
		_integral(res, tmp3, resn - 1, inverse_numbers);
	}

	void Polynomial::_exp_power_series(R *res, int resn, const R *p, int pn, const R *natural_numbers, const R *inverse_numbers) {
		if (resn == 0) return;
		_fill_zero(res, resn);
		if (pn == 0) {
			res[0] = OneR();
			return;
		}
		assert(IsZeroR(p[0]));
		res[0] = OneR();
		unique_ptr<R[]> ws(new R[resn * 3]);
		R *tmp1 = ws.get(), *tmp2 = tmp1 + resn;
		int curn = 1;
		while (curn < resn) {
			int nextn = std::min(resn, curn * 2);
			_log_power_series(tmp1, nextn, res, curn, natural_numbers, inverse_numbers);
			_subtract(tmp1, tmp1, nextn, p, std::min(nextn, pn));
			tmp1[0] -= OneR();
			_multiply_select_method(tmp2, tmp1, nextn, res, curn);
			_negate(res, tmp2, nextn);
			curn = nextn;
		}
	}
}

mint operator^(mint a, unsigned long long k) {
	mint r = 1;
	while (k) {
		if (k & 1) r *= a;
		a *= a;
		k >>= 1;
	}
	return r;
}

vector<mint> fact, factinv;
void nCr_computeFactinv(int N) {
	fact.resize(N + 1); factinv.resize(N + 1);
	fact[0] = 1;
	rer(i, 1, N) fact[i] = fact[i - 1] * i;
	factinv[N] = fact[N].inverse();
	for (int i = N; i >= 1; i --) factinv[i - 1] = factinv[i] * i;
}

int main() {
	mint::set_mod(663224321);
	using mod_polynomial::Polynomial;
	const int N = 200000;
	nCr_computeFactinv(N);
	Polynomial P;
	P.resize(N + 1);
	rer(n, 0, N)
		P.set(n, mint(2) ^ ((ll)n * (n - 1) / 2));
	rer(n, 0, N)
		P.set(n, P.get(n) * factinv[n]);
	Polynomial L = P.logarithm(N);
	rer(n, 0, N)
		L.set(n, L.get(n) * fact[n]);
	int T;
	scanf("%d", &T);
	for (int ii = 0; ii < T; ++ ii) {
		int n;
		scanf("%d", &n);
		mint ans = L.get(n);
		printf("%d\n", (int)ans.get());
	}
	return 0;
}

Counting Road Networks Java Solution


import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.InputMismatchException;

public class E2 {
	InputStream is;
	PrintWriter out;
	String INPUT = "";
	
	void solve()
	{
		int n = 100005;
		long[] a = new long[n];
		int mod = 663224321;
		for(int i = 0;i < n;i++){
			a[i] = pow(2, (long)i*(i-1)/2, mod);
		}
		int[][] fif = enumFIF(100005, mod);
		long[] ta = transformLogarithmically(a, fif);
		for(int Q = ni();Q > 0;Q--){
			out.println(ta[ni()]);
		}
	}
	
	public static int[][] enumFIF(int n, int mod) {
		int[] f = new int[n + 1];
		int[] invf = new int[n + 1];
		f[0] = 1;
		for (int i = 1; i <= n; i++) {
			f[i] = (int) ((long) f[i - 1] * i % mod);
		}
		long a = f[n];
		long b = mod;
		long p = 1, q = 0;
		while (b > 0) {
			long c = a / b;
			long d;
			d = a;
			a = b;
			b = d % b;
			d = p;
			p = q;
			q = d - c * q;
		}
		invf[n] = (int) (p < 0 ? p + mod : p);
		for (int i = n - 1; i >= 0; i--) {
			invf[i] = (int) ((long) invf[i + 1] * (i + 1) % mod);
		}
		return new int[][] { f, invf };
	}

	
	public static long pow(long a, long n, long mod) {
		//		a %= mod;
		long ret = 1;
		int x = 63 - Long.numberOfLeadingZeros(n);
		for (; x >= 0; x--) {
			ret = ret * ret % mod;
			if (n << 63 - x < 0)
				ret = ret * a % mod;
		}
		return ret;
	}
	
	public static int mod = 663224321;
	public static int G = 3;
	
	public static long[] mul(long[] a, long[] b)
	{
		return Arrays.copyOf(convoluteSimply(a, b, mod, G), a.length+b.length-1);
	}
	
	public static long[] mul(long[] a, long[] b, int lim)
	{
		return Arrays.copyOf(convoluteSimply(a, b, mod, G), lim);
	}
	
	public static long[] mulnaive(long[] a, long[] b)
	{
		long[] c = new long[a.length+b.length-1];
		long big = 8L*mod*mod;
		for(int i = 0;i < a.length;i++){
			for(int j = 0;j < b.length;j++){
				c[i+j] += a[i]*b[j];
				if(c[i+j] >= big)c[i+j] -= big;
			}
		}
		for(int i = 0;i < c.length;i++)c[i] %= mod;
		return c;
	}
	
	public static long[] mulnaive(long[] a, long[] b, int lim)
	{
		long[] c = new long[lim];
		long big = 8L*mod*mod;
		for(int i = 0;i < a.length;i++){
			for(int j = 0;j < b.length && i+j < lim;j++){
				c[i+j] += a[i]*b[j];
				if(c[i+j] >= big)c[i+j] -= big;
			}
		}
		for(int i = 0;i < c.length;i++)c[i] %= mod;
		return c;
	}
	
	public static long[] add(long[] a, long[] b)
	{
		long[] c = new long[Math.max(a.length, b.length)];
		for(int i = 0;i < a.length;i++)c[i] += a[i];
		for(int i = 0;i < b.length;i++)c[i] += b[i];
		for(int i = 0;i < c.length;i++)if(c[i] >= mod)c[i] -= mod;
		return c;
	}
	
	public static long[] add(long[] a, long[] b, int lim)
	{
		long[] c = new long[lim];
		for(int i = 0;i < a.length && i < lim;i++)c[i] += a[i];
		for(int i = 0;i < b.length && i < lim;i++)c[i] += b[i];
		for(int i = 0;i < c.length;i++)if(c[i] >= mod)c[i] -= mod;
		return c;
	}
	
	public static long[] sub(long[] a, long[] b)
	{
		long[] c = new long[Math.max(a.length, b.length)];
		for(int i = 0;i < a.length;i++)c[i] += a[i];
		for(int i = 0;i < b.length;i++)c[i] -= b[i];
		for(int i = 0;i < c.length;i++)if(c[i] < 0)c[i] += mod;
		return c;
	}
	
	public static long[] sub(long[] a, long[] b, int lim)
	{
		long[] c = new long[lim];
		for(int i = 0;i < a.length && i < lim;i++)c[i] += a[i];
		for(int i = 0;i < b.length && i < lim;i++)c[i] -= b[i];
		for(int i = 0;i < c.length;i++)if(c[i] < 0)c[i] += mod;
		return c;
	}
	
	// F_{t+1}(x) = -F_t(x)^2*P(x) + 2F_t(x)
	// if want p-destructive, comment out flipping p just before returning.
	public static long[] inv(long[] p)
	{
		int n = p.length;
		long[] f = {invl(p[0], mod)};
		for(int i = 0;i < p.length;i++){
			if(p[i] == 0)continue;
			p[i] = mod-p[i];
		}
		for(int i = 1;i < 2*n;i*=2){
			long[] f2 = mul(f, f, Math.min(n, 2*i));
			long[] f2p = mul(f2, Arrays.copyOf(p, i), Math.min(n, 2*i));
			for(int j = 0;j < f.length;j++){
				f2p[j] += 2L*f[j];
				if(f2p[j] >= mod)f2p[j] -= mod;
				if(f2p[j] >= mod)f2p[j] -= mod;
			}
			f = f2p;
		}
		for(int i = 0;i < p.length;i++){
			if(p[i] == 0)continue;
			p[i] = mod-p[i];
		}
		return f;
	}
	
	// differentiate	
	public static long[] d(long[] p)
	{
		long[] q = new long[p.length];
		for(int i = 0;i < p.length-1;i++){
			q[i] = p[i+1] * (i+1) % mod;
		}
		return q;
	}
	
	// integrate
	public static long[] i(long[] p)
	{
		long[] q = new long[p.length];
		for(int i = 0;i < p.length-1;i++){
			q[i+1] = p[i] * invl(i+1, mod) % mod;
		}
		return q;
	}
	
	// F_{t+1}(x) = F_t(x)-(ln F_t(x) - P(x)) * F_t(x)
	public static long[] exp(long[] p)
	{
		int n = p.length;
		long[] f = {p[0]};
		for(int i = 1;i < 2*n;i*=2){
			long[] ii = ln(f);
			long[] sub = sub(ii, p, Math.min(n, 2*i));
			if(--sub[0] < 0)sub[0] += mod;
			for(int j = 0;j < 2*i && j < n;j++){
				sub[j] = mod-sub[j];
				if(sub[j] == mod)sub[j] = 0;
			}
			f = mul(sub, f, Math.min(n, 2*i));
//			f = sub(f, mul(sub(ii, p, 2*i), f, 2*i));
		}
		return f;
	}
	
	// \int f'(x)/f(x) dx
	public static long[] ln(long[] f)
	{
		long[] ret = i(mul(d(f), inv(f)));
		ret[0] = f[0];
		return ret;
	}
	
	// ln F(x) - k ln P(x) = 0
	public static long[] pow(long[] p, int K)
	{
		int n = p.length;
		long[] lnp = ln(p);
		for(int i = 1;i < lnp.length;i++)lnp[i] = lnp[i] * K % mod;
		lnp[0] = pow(p[0], K, mod); // go well for some reason
		return exp(Arrays.copyOf(lnp, n));
	}
	
	// destructive
	public static long[] divf(long[] a, int[][] fif)
	{
		for(int i = 0;i < a.length;i++)a[i] = a[i] * fif[1][i] % mod;
		return a;
	}
	
	// destructive
	public static long[] mulf(long[] a, int[][] fif)
	{
		for(int i = 0;i < a.length;i++)a[i] = a[i] * fif[0][i] % mod;
		return a;
	}
	
	public static long[] transformExponentially(long[] a, int[][] fif)
	{
		return mulf(exp(divf(Arrays.copyOf(a, a.length), fif)), fif);
	}
	
	public static long[] transformLogarithmically(long[] a, int[][] fif)
	{
		return mulf(Arrays.copyOf(ln(divf(Arrays.copyOf(a, a.length), fif)), a.length), fif);
	}
	
	public static long invl(long a, long mod) {
		long b = mod;
		long p = 1, q = 0;
		while (b > 0) {
			long c = a / b;
			long d;
			d = a;
			a = b;
			b = d % b;
			d = p;
			p = q;
			q = d - c * q;
		}
		return p < 0 ? p + mod : p;
	}
	
	public static long[] reverse(long[] p)
	{
		long[] ret = new long[p.length];
		for(int i = 0;i < p.length;i++){
			ret[i] = p[p.length-1-i];
		}
		return ret;
	}
	
	public static long[] reverse(long[] p, int lim)
	{
		long[] ret = new long[lim];
		for(int i = 0;i < lim && i < p.length;i++){
			ret[i] = p[p.length-1-i];
		}
		return ret;
	}
	
	// [quotient, remainder]
	// remainder can be empty.
	// @see http://www.dis.uniroma1.it/~sankowski/lecture4.pdf
	public static long[][] div(long[] p, long[] q)
	{
		if(p.length < q.length)return new long[][]{new long[0], Arrays.copyOf(p, p.length)};
		long[] rp = reverse(p, p.length-q.length+1);
		long[] rq = reverse(q, p.length-q.length+1);
		long[] rd = mul(rp, inv(rq), p.length-q.length+1);
		long[] d = reverse(rd, p.length-q.length+1);
		long[] r = sub(p, mul(d, q, q.length-1), q.length-1);
		return new long[][]{d, r};
	}

	public static long[] substitute(long[] p, long[] xs)
	{
		return descendProductTree(p, buildProductTree(xs));
	}
	
	public static long[][] buildProductTree(long[] xs)
	{
		int m = Integer.highestOneBit(xs.length)*4;
		long[][] ms = new long[m][];
		for(int i = 0;i < xs.length;i++){
			ms[m/2+i] = new long[]{mod-xs[i], 1};
		}
		for(int i = m/2-1;i >= 1;i--){
			if(ms[2*i] == null){
				ms[i] = null;
			}else if(ms[2*i+1] == null){
				ms[i] = ms[2*i];
			}else{
				ms[i] = mul(ms[2*i], ms[2*i+1]);
			}
		}
		return ms;
	}
	
	public static long[] descendProductTree(long[] p, long[][] pt)
	{
		long[] rets = new long[pt[1].length-1];
		dfs(p, pt, 1, rets);
		return rets;
	}
	
	private static void dfs(long[] p, long[][] pt, int cur, long[] rets)
	{
		if(pt[cur] == null)return;
		if(cur >= pt.length/2){
			rets[cur-pt.length/2] = p[0];
		}else{
			// F = q1X+r1
			// F = q2Y+r2
			
			if(p.length >= 1500){
				if(pt[2*cur+1] != null){
					long[][] qr0 = div(p, pt[2*cur]);
					dfs(qr0[1], pt, cur*2, rets);
					long[][] qr1 = div(p, pt[2*cur+1]);
					dfs(qr1[1], pt, cur*2+1, rets);
				}else if(pt[2*cur] != null){
					long[] nex = cur == 1 ? div(p, pt[2*cur])[1] : p;
					dfs(nex, pt, cur*2, rets);
				}
			}else{
				if(pt[2*cur+1] != null){
					dfs(modnaive(p, pt[2*cur]), pt, cur*2, rets);
					dfs(modnaive(p, pt[2*cur+1]), pt, cur*2+1, rets);
				}else if(pt[2*cur] != null){
					long[] nex = cur == 1 ? modnaive(p, pt[2*cur]) : p;
					dfs(nex, pt, cur*2, rets);
				}
			}
		}
	}
	
	
	public static long[][] divnaive(long[] a, long[] b)
	{
		int n = a.length, m = b.length;
		if(n-m+1 <= 0)return new long[][]{new long[0], Arrays.copyOf(a, n)};
		long[] r = Arrays.copyOf(a, n);
		long[] q = new long[n-m+1];
		long ib = invl(b[m-1], mod);
		for(int i = n-1;i >= m-1;i--){
			long x = ib * r[i] % mod;
			q[i-(m-1)] = x;
			for(int j = m-1;j >= 0;j--){
				r[i+j-(m-1)] -= b[j]*x;
				r[i+j-(m-1)] %= mod;
				if(r[i+j-(m-1)] < 0)r[i+j-(m-1)] += mod;
//				r[i+j-(m-1)] = modh(r[i+j-(m-1)]+(long)mod*mod - b[j]*x, MM, HH, mod);
			}
		}
		return new long[][]{q, Arrays.copyOf(r, m-1)};
	}
	
	public static long[] modnaive(long[] a, long[] b)
	{
		int n = a.length, m = b.length;
		if(n-m+1 <= 0)return a;
		long[] r = Arrays.copyOf(a, n);
		long ib = invl(b[m-1], mod);
		for(int i = n-1;i >= m-1;i--){
			long x = ib * r[i] % mod;
			for(int j = m-1;j >= 0;j--){
				r[i+j-(m-1)] -= b[j]*x;
				r[i+j-(m-1)] %= mod;
				if(r[i+j-(m-1)] < 0)r[i+j-(m-1)] += mod;
//				r[i+j-(m-1)] = modh(r[i+j-(m-1)]+(long)mod*mod - b[j]*x, MM, HH, mod);
			}
		}
		return Arrays.copyOf(r, m-1);
	}

	public static final int[] NTTPrimes = {1053818881, 1051721729, 1045430273, 1012924417, 1007681537, 1004535809, 998244353, 985661441, 976224257, 975175681};
	public static final int[] NTTPrimitiveRoots = {7, 6, 3, 5, 3, 3, 3, 3, 3, 17};
//	public static final int[] NTTPrimes = {1012924417, 1004535809, 998244353, 985661441, 975175681, 962592769, 950009857, 943718401, 935329793, 924844033};
//	public static final int[] NTTPrimitiveRoots = {5, 3, 3, 3, 17, 7, 7, 7, 3, 5};
	
	public static long[] convoluteSimply(long[] a, long[] b, int P, int g)
	{
		int m = Math.max(2, Integer.highestOneBit(Math.max(a.length, b.length)-1)<<2);
		long[] fa = nttmb(a, m, false, P, g);
		long[] fb = a == b ? fa : nttmb(b, m, false, P, g);
		for(int i = 0;i < m;i++){
			fa[i] = fa[i]*fb[i]%P;
		}
		return nttmb(fa, m, true, P, g);
	}
	
	public static long[] convolute(long[] a, long[] b)
	{
		int USE = 2;
		int m = Math.max(2, Integer.highestOneBit(Math.max(a.length, b.length)-1)<<2);
		long[][] fs = new long[USE][];
		for(int k = 0;k < USE;k++){
			int P = NTTPrimes[k], g = NTTPrimitiveRoots[k];
			long[] fa = nttmb(a, m, false, P, g);
			long[] fb = a == b ? fa : nttmb(b, m, false, P, g);
			for(int i = 0;i < m;i++){
				fa[i] = fa[i]*fb[i]%P;
			}
			fs[k] = nttmb(fa, m, true, P, g);
		}
		
		int[] mods = Arrays.copyOf(NTTPrimes, USE);
		long[] gammas = garnerPrepare(mods);
		int[] buf = new int[USE];
		for(int i = 0;i < fs[0].length;i++){
			for(int j = 0;j < USE;j++)buf[j] = (int)fs[j][i];
			long[] res = garnerBatch(buf, mods, gammas);
			long ret = 0;
			for(int j = res.length-1;j >= 0;j--)ret = ret * mods[j] + res[j];
			fs[0][i] = ret;
		}
		return fs[0];
	}
	
	public static long[] convolute(long[] a, long[] b, int USE, int mod)
	{
		int m = Math.max(2, Integer.highestOneBit(Math.max(a.length, b.length)-1)<<2);
		long[][] fs = new long[USE][];
		for(int k = 0;k < USE;k++){
			int P = NTTPrimes[k], g = NTTPrimitiveRoots[k];
			long[] fa = nttmb(a, m, false, P, g);
			long[] fb = a == b ? fa : nttmb(b, m, false, P, g);
			for(int i = 0;i < m;i++){
				fa[i] = fa[i]*fb[i]%P;
			}
			fs[k] = nttmb(fa, m, true, P, g);
		}
		
		int[] mods = Arrays.copyOf(NTTPrimes, USE);
		long[] gammas = garnerPrepare(mods);
		int[] buf = new int[USE];
		for(int i = 0;i < fs[0].length;i++){
			for(int j = 0;j < USE;j++)buf[j] = (int)fs[j][i];
			long[] res = garnerBatch(buf, mods, gammas);
			long ret = 0;
			for(int j = res.length-1;j >= 0;j--)ret = (ret * mods[j] + res[j]) % mod;
			fs[0][i] = ret;
		}
		return fs[0];
	}
	
	// static int[] wws = new int[270000]; // outer faster
	
	// Modifed Montgomery + Barrett
	private static long[] nttmb(long[] src, int n, boolean inverse, int P, int g)
	{
		long[] dst = Arrays.copyOf(src, n);
		
		int h = Integer.numberOfTrailingZeros(n);
		long K = Integer.highestOneBit(P)<<1;
		int H = Long.numberOfTrailingZeros(K)*2;
		long M = K*K/P;
		
		int[] wws = new int[1<<h-1];
		long dw = inverse ? pow(g, P-1-(P-1)/n, P) : pow(g, (P-1)/n, P);
		long w = (1L<<32)%P;
		for(int k = 0;k < 1<<h-1;k++){
			wws[k] = (int)w;
			w = modh(w*dw, M, H, P);
		}
		long J = invl(P, 1L<<32);
		for(int i = 0;i < h;i++){
			for(int j = 0;j < 1<<i;j++){
				for(int k = 0, s = j<<h-i, t = s|1<<h-i-1;k < 1<<h-i-1;k++,s++,t++){
					long u = (dst[s] - dst[t] + 2*P)*wws[k];
					dst[s] += dst[t];
					if(dst[s] >= 2*P)dst[s] -= 2*P;
//					long Q = (u&(1L<<32)-1)*J&(1L<<32)-1;
					long Q = (u<<32)*J>>>32;
					dst[t] = (u>>>32)-(Q*P>>>32)+P;
				}
			}
			if(i < h-1){
				for(int k = 0;k < 1<<h-i-2;k++)wws[k] = wws[k*2];
			}
		}
		for(int i = 0;i < n;i++){
			if(dst[i] >= P)dst[i] -= P;
		}
		for(int i = 0;i < n;i++){
			int rev = Integer.reverse(i)>>>-h;
			if(i < rev){
				long d = dst[i]; dst[i] = dst[rev]; dst[rev] = d;
			}
		}
		
		if(inverse){
			long in = invl(n, P);
			for(int i = 0;i < n;i++)dst[i] = modh(dst[i]*in, M, H, P);
		}
		
		return dst;
	}
	
	static final long mask = (1L<<31)-1;
	
	public static long modh(long a, long M, int h, int mod)
	{
		long r = a-((M*(a&mask)>>>31)+M*(a>>>31)>>>h-31)*mod;
		return r < mod ? r : r-mod;
	}
	
	private static long[] garnerPrepare(int[] m)
	{
		int n = m.length;
		assert n == m.length;
		if(n == 0)return new long[0];
		long[] gamma = new long[n];
		for(int k = 1;k < n;k++){
			long prod = 1;
			for(int i = 0;i < k;i++){
				prod = prod * m[i] % m[k];
			}
			gamma[k] = invl(prod, m[k]);
		}
		return gamma;
	}
	
	private static long[] garnerBatch(int[] u, int[] m, long[] gamma)
	{
		int n = u.length;
		assert n == m.length;
		long[] v = new long[n];
		v[0] = u[0];
		for(int k = 1;k < n;k++){
			long temp = v[k-1];
			for(int j = k-2;j >= 0;j--){
				temp = (temp * m[j] + v[j]) % m[k];
			}
			v[k] = (u[k] - temp) * gamma[k] % m[k];
			if(v[k] < 0)v[k] += m[k];
		}
		return v;
	}
	
	void run() throws Exception
	{
		is = INPUT.isEmpty() ? System.in : new ByteArrayInputStream(INPUT.getBytes());
		out = new PrintWriter(System.out);
		
		long s = System.currentTimeMillis();
		solve();
		out.flush();
		if(!INPUT.isEmpty())tr(System.currentTimeMillis()-s+"ms");
	}
	
	public static void main(String[] args) throws Exception { new E2().run(); }
	
	private byte[] inbuf = new byte[1024];
	public int lenbuf = 0, ptrbuf = 0;
	
	private int readByte()
	{
		if(lenbuf == -1)throw new InputMismatchException();
		if(ptrbuf >= lenbuf){
			ptrbuf = 0;
			try { lenbuf = is.read(inbuf); } catch (IOException e) { throw new InputMismatchException(); }
			if(lenbuf <= 0)return -1;
		}
		return inbuf[ptrbuf++];
	}
	
	private boolean isSpaceChar(int c) { return !(c >= 33 && c <= 126); }
	private int skip() { int b; while((b = readByte()) != -1 && isSpaceChar(b)); return b; }
	
	private double nd() { return Double.parseDouble(ns()); }
	private char nc() { return (char)skip(); }
	
	private String ns()
	{
		int b = skip();
		StringBuilder sb = new StringBuilder();
		while(!(isSpaceChar(b))){ // when nextLine, (isSpaceChar(b) && b != ' ')
			sb.appendCodePoint(b);
			b = readByte();
		}
		return sb.toString();
	}
	
	private char[] ns(int n)
	{
		char[] buf = new char[n];
		int b = skip(), p = 0;
		while(p < n && !(isSpaceChar(b))){
			buf[p++] = (char)b;
			b = readByte();
		}
		return n == p ? buf : Arrays.copyOf(buf, p);
	}
	
	private char[][] nm(int n, int m)
	{
		char[][] map = new char[n][];
		for(int i = 0;i < n;i++)map[i] = ns(m);
		return map;
	}
	
	private int[] na(int n)
	{
		int[] a = new int[n];
		for(int i = 0;i < n;i++)a[i] = ni();
		return a;
	}
	
	private int ni()
	{
		int num = 0, b;
		boolean minus = false;
		while((b = readByte()) != -1 && !((b >= '0' && b <= '9') || b == '-'));
		if(b == '-'){
			minus = true;
			b = readByte();
		}
		
		while(true){
			if(b >= '0' && b <= '9'){
				num = num * 10 + (b - '0');
			}else{
				return minus ? -num : num;
			}
			b = readByte();
		}
	}
	
	private long nl()
	{
		long num = 0;
		int b;
		boolean minus = false;
		while((b = readByte()) != -1 && !((b >= '0' && b <= '9') || b == '-'));
		if(b == '-'){
			minus = true;
			b = readByte();
		}
		
		while(true){
			if(b >= '0' && b <= '9'){
				num = num * 10 + (b - '0');
			}else{
				return minus ? -num : num;
			}
			b = readByte();
		}
	}
	
	private static void tr(Object... o) { System.out.println(Arrays.deepToString(o)); }
}