Hello Codeforcers! I need your help to debug this strange TLE verdict. I was trying to implement the editorial of the problem [D. Chip Moves](https://codeforces.me/contest/1716/problem/D). My implementation which is very similar to the editorial is this: ↵
↵
↵
```c++↵
using ll = long long;↵
#define rep(i, n) for (ll i=0; i<n; ++i)↵
#define rep1(i, n) for (ll i=1; i<=n; ++i)↵
↵
const ll MAXN = 2e5 + 5;↵
ll A[MAXN][4];↵
↵
void solve() {↵
constexpr ll mod = 998244353;↵
↵
memset(A, 0, sizeof A); ↵
↵
ll n; cin >> n;↵
ll k; cin >> k;↵
↵
ll tot = 0;↵
ll step = k;↵
↵
ll CUR = 0;↵
ll DP = 1;↵
ll SAME = 2;↵
ll ANS = 3;↵
↵
A[0][DP] = 1; ↵
↵
ll iters = 0;↵
while ((tot += step) <= n) {↵
iters++;↵
↵
rep (i, n+1) A[i][CUR] = 0;↵
↵
rep (i, step) A[i][SAME] = 0;↵
↵
for (ll i = max<ll>(step-4, 0); i <= n; ++i) { ↵
(A[i][CUR] += A[i % step][SAME]) %= mod;↵
↵
// This line seems to cause the TLE↵
(A[i][ANS] += A[i][CUR]) %= mod; ↵
↵
(A[i % step][SAME] += A[i][DP]) %= mod;↵
} ↵
↵
swap(CUR, DP);↵
step++;↵
}↵
↵
for (ll i = 1; i <= n; ++i) {↵
cout << A[i][ANS] << " ";↵
}↵
↵
cout << endl;↵
↵
// cerr << "iters: " << iters << endl; ↵
↵
}↵
```↵
↵
↵
which received a TLE verdict. Upon testing this locally, it seems to work pretty fast uptill n = 170000, but when putting n = 180000, the program suddenly stops running and hangs. Upon further investigation inside the solve function, this one line `(A[i][ANS] += A[i][CUR]) %= mod;` seems to be the issue! Just uncommenting this one line gives the solution extremely fast (although obviously wrong)! However uncommenting this line out causes the program to TLE. ↵
↵
<spoiler summary="Full code">↵
↵
```c++↵
#pragma GCC optimize("O3")↵
#pragma GCC optimize ("unroll-loops")↵
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx")↵
↵
#include <bits/stdc++.h>↵
#include <ext/pb_ds/assoc_container.hpp> ↵
#include <ext/pb_ds/tree_policy.hpp> ↵
using namespace std;↵
using namespace __gnu_pbds;↵
↵
/* --- <SNIPPET> --- */↵
↵
/* --- </SNIPPET> --- */↵
↵
//#define MOD ((ll)(1e9 + 7))↵
#define MOD 998244353↵
using ll = long long;↵
using ull = unsigned long long;↵
typedef vector<ll> vll;↵
typedef vector<int> vi;↵
typedef vector<ull> vull;↵
typedef pair<ll, ll> pll;↵
↵
↵
/* --- some std helpers --- */↵
struct custom_hash {↵
static uint64_t splitmix64(uint64_t x) {↵
// http://xorshift.di.unimi.it/splitmix64.c↵
x += 0x9e3779b97f4a7c15;↵
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;↵
x = (x ^ (x >> 27)) * 0x94d049bb133111eb;↵
return x ^ (x >> 31);↵
}↵
↵
size_t operator()(uint64_t x) const {↵
static const uint64_t FIXED_RANDOM = chrono::steady_clock::now().time_since_epoch().count();↵
return splitmix64(x + FIXED_RANDOM);↵
}↵
};↵
↵
template <typename X, typename Y>↵
std::ostream& operator << ( std::ostream & os, pair<X, Y> const &p)↵
{ ↵
return os << '[' << p.first << ',' << p.second << ']'; ↵
}↵
/* ------ */↵
↵
↵
template<class T, class U> inline bool chmin(T& a, const U& b) { if (a > b) { a = b; return true; } return false; }↵
template<class T, class U> inline bool chmax(T& a, const U& b) { if (a < b) { a = b; return true; } return false; }↵
↵
↵
template <typename K, typename V>↵
using umap = gp_hash_table<K, V, custom_hash>;↵
↵
template <typename T>↵
using uset = umap<T, null_type>;↵
↵
template <typename T>↵
using oset = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;↵
↵
#define vec vector↵
#define endl "\n"↵
#define pb push_back↵
#define ff first↵
#define ss second↵
#define all(x) x.begin(), x.end()↵
#define rep(i, n) for (ll i=0; i<n; ++i)↵
#define rep1(i, n) for (ll i=1; i<=n; ++i)↵
#define rev(i, n) for (ll i=n; i>=0; --i)↵
#define grid(type, name, m, n) vec<vec<type>> name(m, vec<type>(n))↵
#define gridv(type, name, m, n, v) vec<vec<type>> name(m, vec<type>(n, v))↵
#define grid3(type, name, p, q, r, val) vec<vec<vec<type>>> name(p, vec<vec<type>>(q, vec<type>(r, val)))↵
#define each(x, a) for (auto &x : a)↵
template <typename T> void print(T v) { cout << v << endl; }↵
template <typename T> void print(vector<T> v) { each(x, v) cout << x << " "; cout << endl; }↵
template <typename T> void print(vector<vector<T>> v) { each (r, v) print(r); }↵
↵
#ifdef LOCAL↵
#define dbg(...) fprintf(stderr, __VA_ARGS__)↵
#else↵
#define dbg(...)↵
#endif↵
↵
/* -- COMMON UTILITIES -- */↵
ll binpow(ll a, ll b, ll m) {↵
a %= m;↵
ll res = 1;↵
while (b > 0) {↵
if (b & 1)↵
res = res * a % m;↵
a = a * a % m;↵
b >>= 1;↵
}↵
return res;↵
}↵
↵
class PrefixSum {↵
vll psum;↵
public:↵
PrefixSum(vll &a) {↵
psum.resize(a.size());↵
partial_sum(all(a), psum.begin());↵
}↵
ll query(ll l, ll r) {↵
if (l > r) return 0;↵
return psum[r] - (l > 0 ? psum[l-1] : 0);↵
}↵
};↵
/* -- END -- */↵
↵
const ll MAXN = 2e5 + 5;↵
ll A[MAXN][4];↵
↵
void solve() {↵
constexpr ll mod = 998244353;↵
↵
memset(A, 0, sizeof A); ↵
↵
ll n; cin >> n;↵
ll k; cin >> k;↵
↵
ll tot = 0;↵
ll step = k;↵
↵
ll CUR = 0;↵
ll DP = 1;↵
ll SAME = 2;↵
ll ANS = 3;↵
↵
A[0][DP] = 1; ↵
↵
ll iters = 0;↵
while ((tot += step) <= n) {↵
iters++;↵
↵
rep (i, n+1) A[i][CUR] = 0;↵
↵
rep (i, step) A[i][SAME] = 0;↵
↵
for (ll i = max<ll>(step-4, 0); i <= n; ++i) { ↵
(A[i][CUR] += A[i % step][SAME]) %= mod;↵
↵
(A[i][ANS] += A[i][CUR]) %= mod; ↵
↵
(A[i % step][SAME] += A[i][DP]) %= mod;↵
} ↵
↵
swap(CUR, DP);↵
step++;↵
}↵
↵
for (ll i = 1; i <= n; ++i) {↵
cout << A[i][ANS] << " ";↵
}↵
↵
cout << endl;↵
↵
// cerr << "iters: " << iters << endl; ↵
↵
}↵
↵
signed main() {↵
#ifndef LOCAL↵
ios_base::sync_with_stdio(false); cin.tie(NULL); cout.tie(NULL);↵
#endif↵
ll t = 1;↵
/* cin >> t; */↵
↵
while (t--) {↵
solve();↵
}↵
↵
return 0;↵
}↵
↵
```↵
↵
</spoiler>↵
↵
What is about this one small line which is a simple O(1) operation which causes the whole program to simply hang? I've been stuck on this weird bug for a while and would like to seek your help.↵
↵
↵
```c++↵
using ll = long long;↵
#define rep(i, n) for (ll i=0; i<n; ++i)↵
#define rep1(i, n) for (ll i=1; i<=n; ++i)↵
↵
const ll MAXN = 2e5 + 5;↵
ll A[MAXN][4];↵
↵
void solve() {↵
constexpr ll mod = 998244353;↵
↵
memset(A, 0, sizeof A); ↵
↵
ll n; cin >> n;↵
ll k; cin >> k;↵
↵
ll tot = 0;↵
ll step = k;↵
↵
ll CUR = 0;↵
ll DP = 1;↵
ll SAME = 2;↵
ll ANS = 3;↵
↵
A[0][DP] = 1; ↵
↵
ll iters = 0;↵
while ((tot += step) <= n) {↵
iters++;↵
↵
rep (i, n+1) A[i][CUR] = 0;↵
↵
rep (i, step) A[i][SAME] = 0;↵
↵
for (ll i = max<ll>(step-4, 0); i <= n; ++i) { ↵
(A[i][CUR] += A[i % step][SAME]) %= mod;↵
↵
// This line seems to cause the TLE↵
(A[i][ANS] += A[i][CUR]) %= mod; ↵
↵
(A[i % step][SAME] += A[i][DP]) %= mod;↵
} ↵
↵
swap(CUR, DP);↵
step++;↵
}↵
↵
for (ll i = 1; i <= n; ++i) {↵
cout << A[i][ANS] << " ";↵
}↵
↵
cout << endl;↵
↵
// cerr << "iters: " << iters << endl; ↵
↵
}↵
```↵
↵
↵
which received a TLE verdict. Upon testing this locally, it seems to work pretty fast uptill n = 170000, but when putting n = 180000, the program suddenly stops running and hangs. Upon further investigation inside the solve function, this one line `(A[i][ANS] += A[i][CUR]) %= mod;` seems to be the issue! Just uncommenting this one line gives the solution extremely fast (although obviously wrong)! However uncommenting this line out causes the program to TLE. ↵
↵
<spoiler summary="Full code">↵
↵
```c++↵
#pragma GCC optimize("O3")↵
#pragma GCC optimize ("unroll-loops")↵
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx")↵
↵
#include <bits/stdc++.h>↵
#include <ext/pb_ds/assoc_container.hpp> ↵
#include <ext/pb_ds/tree_policy.hpp> ↵
using namespace std;↵
using namespace __gnu_pbds;↵
↵
/* --- <SNIPPET> --- */↵
↵
/* --- </SNIPPET> --- */↵
↵
//#define MOD ((ll)(1e9 + 7))↵
#define MOD 998244353↵
using ll = long long;↵
using ull = unsigned long long;↵
typedef vector<ll> vll;↵
typedef vector<int> vi;↵
typedef vector<ull> vull;↵
typedef pair<ll, ll> pll;↵
↵
↵
/* --- some std helpers --- */↵
struct custom_hash {↵
static uint64_t splitmix64(uint64_t x) {↵
// http://xorshift.di.unimi.it/splitmix64.c↵
x += 0x9e3779b97f4a7c15;↵
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;↵
x = (x ^ (x >> 27)) * 0x94d049bb133111eb;↵
return x ^ (x >> 31);↵
}↵
↵
size_t operator()(uint64_t x) const {↵
static const uint64_t FIXED_RANDOM = chrono::steady_clock::now().time_since_epoch().count();↵
return splitmix64(x + FIXED_RANDOM);↵
}↵
};↵
↵
template <typename X, typename Y>↵
std::ostream& operator << ( std::ostream & os, pair<X, Y> const &p)↵
{ ↵
return os << '[' << p.first << ',' << p.second << ']'; ↵
}↵
/* ------ */↵
↵
↵
template<class T, class U> inline bool chmin(T& a, const U& b) { if (a > b) { a = b; return true; } return false; }↵
template<class T, class U> inline bool chmax(T& a, const U& b) { if (a < b) { a = b; return true; } return false; }↵
↵
↵
template <typename K, typename V>↵
using umap = gp_hash_table<K, V, custom_hash>;↵
↵
template <typename T>↵
using uset = umap<T, null_type>;↵
↵
template <typename T>↵
using oset = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;↵
↵
#define vec vector↵
#define endl "\n"↵
#define pb push_back↵
#define ff first↵
#define ss second↵
#define all(x) x.begin(), x.end()↵
#define rep(i, n) for (ll i=0; i<n; ++i)↵
#define rep1(i, n) for (ll i=1; i<=n; ++i)↵
#define rev(i, n) for (ll i=n; i>=0; --i)↵
#define grid(type, name, m, n) vec<vec<type>> name(m, vec<type>(n))↵
#define gridv(type, name, m, n, v) vec<vec<type>> name(m, vec<type>(n, v))↵
#define grid3(type, name, p, q, r, val) vec<vec<vec<type>>> name(p, vec<vec<type>>(q, vec<type>(r, val)))↵
#define each(x, a) for (auto &x : a)↵
template <typename T> void print(T v) { cout << v << endl; }↵
template <typename T> void print(vector<T> v) { each(x, v) cout << x << " "; cout << endl; }↵
template <typename T> void print(vector<vector<T>> v) { each (r, v) print(r); }↵
↵
#ifdef LOCAL↵
#define dbg(...) fprintf(stderr, __VA_ARGS__)↵
#else↵
#define dbg(...)↵
#endif↵
↵
/* -- COMMON UTILITIES -- */↵
ll binpow(ll a, ll b, ll m) {↵
a %= m;↵
ll res = 1;↵
while (b > 0) {↵
if (b & 1)↵
res = res * a % m;↵
a = a * a % m;↵
b >>= 1;↵
}↵
return res;↵
}↵
↵
class PrefixSum {↵
vll psum;↵
public:↵
PrefixSum(vll &a) {↵
psum.resize(a.size());↵
partial_sum(all(a), psum.begin());↵
}↵
ll query(ll l, ll r) {↵
if (l > r) return 0;↵
return psum[r] - (l > 0 ? psum[l-1] : 0);↵
}↵
};↵
/* -- END -- */↵
↵
const ll MAXN = 2e5 + 5;↵
ll A[MAXN][4];↵
↵
void solve() {↵
constexpr ll mod = 998244353;↵
↵
memset(A, 0, sizeof A); ↵
↵
ll n; cin >> n;↵
ll k; cin >> k;↵
↵
ll tot = 0;↵
ll step = k;↵
↵
ll CUR = 0;↵
ll DP = 1;↵
ll SAME = 2;↵
ll ANS = 3;↵
↵
A[0][DP] = 1; ↵
↵
ll iters = 0;↵
while ((tot += step) <= n) {↵
iters++;↵
↵
rep (i, n+1) A[i][CUR] = 0;↵
↵
rep (i, step) A[i][SAME] = 0;↵
↵
for (ll i = max<ll>(step-4, 0); i <= n; ++i) { ↵
(A[i][CUR] += A[i % step][SAME]) %= mod;↵
↵
(A[i][ANS] += A[i][CUR]) %= mod; ↵
↵
(A[i % step][SAME] += A[i][DP]) %= mod;↵
} ↵
↵
swap(CUR, DP);↵
step++;↵
}↵
↵
for (ll i = 1; i <= n; ++i) {↵
cout << A[i][ANS] << " ";↵
}↵
↵
cout << endl;↵
↵
// cerr << "iters: " << iters << endl; ↵
↵
}↵
↵
signed main() {↵
#ifndef LOCAL↵
ios_base::sync_with_stdio(false); cin.tie(NULL); cout.tie(NULL);↵
#endif↵
ll t = 1;↵
/* cin >> t; */↵
↵
while (t--) {↵
solve();↵
}↵
↵
return 0;↵
}↵
↵
```↵
↵
</spoiler>↵
↵
What is about this one small line which is a simple O(1) operation which causes the whole program to simply hang? I've been stuck on this weird bug for a while and would like to seek your help.↵