Strange TLE by cin using GNU C++20 (64)...Again?

#	User	Rating
1	tourist	3856
2	jiangly	3747
3	orzdevinwang	3706
4	jqdai0815	3682
5	ksun48	3591
6	gamegame	3477
7	Benq	3468
8	Radewoosh	3462
9	ecnerwala	3451
10	heuristica	3431

#	User	Contrib.
1	cry	168
2	-is-this-fft-	162
3	Dominater069	160
4	Um_nik	159
5	atcoder_official	156
6	djm03178	153
6	adamant	153
8	luogu_official	149
9	awoo	148
10	TheScrasse	146

This time, Auroraa_ is solving 524E - Rooks and Rectangles. After some trial, he got

this code

// Code written by Auroraa_
#include "bits/stdc++.h"

using namespace std;
#define endl '\n'

const int N = 2e5 + 5;

#define ls (p<<1)
#define rs ((p<<1)|1)
#define mid ((l+r)>>1)

struct Seg {
	int l, r;
	int mn;
}seg[4 * N];
void build(int p, int l, int r) {
	seg[p].l = l, seg[p].r = r;
	seg[p].mn = 0;
	if (l == r)return;
	build(ls, l, mid);
	build(rs, mid + 1, r);
}
void upd(int p, int x, int y) {
	int l = seg[p].l, r = seg[p].r;
	if (l == r) {
		seg[p].mn = y;
		return;
	}
	if (x <= mid)upd(ls, x, y);
	else upd(rs, x, y);
	seg[p].mn = min(seg[ls].mn, seg[rs].mn);
}
int ask(int p, int x, int y) {
	int l = seg[p].l, r = seg[p].r;
	if (x <= l && y >= r)return seg[p].mn;
	int ans = 1000000;
	if (x <= mid)ans = min(ans, ask(ls, x, y));
	if (y > mid)ans = min(ans, ask(rs, x, y));
	return ans;
}
int ans[N];
struct query {
	int x1, y1, x2, y2, id;
};
bool cmp_x(query q1, query q2) { return q1.x2 < q2.x2; }
bool cmp_y(query q1, query q2) { return q1.y2 < q2.y2; }
vector<query>vec;
vector<int>v1[N], v2[N];
void solve() {
	int n, m, k, q;
	cin >> n >> m >> k >> q;
	for (int i = 1; i <= k; i++) {
		int x, y;
		cin >> x >> y;
		v1[x].push_back(y);
		v2[y].push_back(x);
	}

	for (int i = 1; i <= q; i++) {
		int x1, y1, x2, y2;
		cin >> x1 >> y1 >> x2 >> y2;
		vec.push_back({ x1, y1, x2, y2, i });
	}

	sort(vec.begin(), vec.end(), cmp_y);


	build(1, 1, n);
	int now = 0;
	for (int i = 1; i <= m; i++) {
		for (auto x : v2[i])upd(1, x, i);
		while (now < q && vec[now].y2 <= i) {
			if (ans[vec[now].id] == 1) {
				now++;
				continue;
			}
			int mn = ask(1, vec[now].x1, vec[now].x2);
			if (vec[now].y1 <= mn)ans[vec[now].id] = 1;
			now++;
		}
		if (now == q)break;
	}

	build(1, 1, m);
	sort(vec.begin(), vec.end(), cmp_x);
	now = 0;

	for (int i = 1; i <= n; i++) {
		for (auto y : v1[i]) {
			upd(1, y, i);
		}
		while (now < q && vec[now].x2 <= i) {
			if (ans[vec[now].id] == 1) {
				now++;
				continue;
			}
			int mn = ask(1, vec[now].y1, vec[now].y2);
			if (vec[now].x1 <= mn)ans[vec[now].id] = 1;
			now++;
		}
		if (now == q)break;
	}

	for (int i = 1; i <= q; i++) {
		if (ans[i] == 0)cout << "NO" << endl;
		else cout << "YES" << endl;
	}

}

signed main() {
	cin.tie(nullptr)->sync_with_stdio(false);
	int T = 1;
	//    cin>>T;
	while (T--) {
		solve();
	}
}

and then received TLE: 260160406

He was surprised, so he tried it again with exact same code, with C++17 (32). You know what? The AC magic happened again: 260161644.

At first glance, we do think some code has triggered undefined behaviours like Access out of bounds. But Auroraa_ denied that, because all the data could be fit in int32_t.

Then came the dramatic scene: by simply changing the code vector<int>v1[N], v2[N]; to vector<long long>v1[N], v2[N];, the strange TLE problem disappeared: 260164936.

After that, my further investigation(260199113) pointed out that the block causes TLE is:

for (int i = 1; i <= q; i++) {
	int x1, y1, x2, y2;
	cin >> x1 >> y1 >> x2 >> y2;
	vec.push_back({ x1, y1, x2, y2, i });
}

It took over 2000ms(260199205) to write data into this vec. And only took about 100ms(260199447) to do the same thing if we just changed vector<int>v1[N], v2[N]; to vector<long long>v1[N], v2[N];. The weirdest thing is, there is no direct connection between v1,v2 and vec.

Recalling the process just described, coincidentally, this modification process is really similar to the blog Strange TLE by cin using GNU C++20 (64), and some weird subtle changes to (surprisingly) fix it.

And like this blog Slowdown bug affecting C++ (64 bit) on Codeforces, this time we encountered a Pandora's Box that just used a different magic number to open. Although MikeMirzayanov has updated the CF system to

Windows Server 2022

#include <iostream>
#include <string>
#include <cinttypes>
#include "Windows.h"
#include "VersionHelpers.h"
#include "cpuid.h"

//https://zhuanlan.zhihu.com/p/28322626
void getCPUID() {
	uint32_t data[4];
	char str[48];
	for (int i = 0; i < 3; ++i) {
		__cpuid_count(0x80000002 + i, 0, data[0], data[1], data[2], data[3]);
		for (int j = 0; j < 4; ++j)
			reinterpret_cast<uint32_t*>(str)[i * 4 + j] = data[j];
	}
	std::cout << str << std::endl;
}

//https://www.cnblogs.com/LyShark/articles/15019661.html
//https://bbs.kanxue.com/thread-252559.htm
void getSysVersion() {
	std::string ret = std::string(IsWindowsServer() ? "Windows Server" : "Windows") + " ";
	typedef void(__stdcall * NTPROC)(DWORD*, DWORD*, DWORD*);
	DWORD dwMajor, dwMinor, dwBuildNumber;
	NTPROC proc = (NTPROC)GetProcAddress(
	                  LoadLibrary(TEXT("ntdll.dll")),
	                  "RtlGetNtVersionNumbers"
	              );
	uintptr_t PEB;
	proc(&dwMajor, &dwMinor, &dwBuildNumber);
#ifdef _WIN64
	PEB = __readgsqword(0x60);
	dwBuildNumber = *((INT*)(PEB + 0x120));
#else
	PEB = __readfsdword(0x30);
	dwBuildNumber = *((INT*)(PEB + 0xAC));
#endif
	if (dwMajor >= 6) {
		ret += std::to_string(dwMajor) + "." + std::to_string(dwMinor) + "." + std::to_string(dwBuildNumber);
	}
	else {
		// win 8
		float f_ret;
		SYSTEM_INFO info;
		GetSystemInfo(&info);
		OSVERSIONINFOEX os;
		os.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
		if (GetVersionEx((OSVERSIONINFO*)&os)) {
			f_ret = os.dwMajorVersion + os.dwMinorVersion * 0.1;
			ret += std::to_string(f_ret);
		}
	}
	std::cout << ret << std::endl;
}

//https://www.cnblogs.com/lidabo/p/7554473.html
void getMemoryInfo() {
	constexpr int kMaxInfoBuffer = 1 << 8;
	constexpr int GBYTES = 1 << 30;
	constexpr int MBYTES = 1 << 20;
	constexpr int KBYTES = 1 << 10;
	constexpr float DKBYTES = KBYTES;
	std::string memory_info;
	MEMORYSTATUSEX statusex;
	statusex.dwLength = sizeof(statusex);
	if (GlobalMemoryStatusEx(&statusex)) {
		uint64_t total = 0, remain_total = 0, avl = 0, remain_avl = 0;
		double decimal_total = 0, decimal_avl = 0;
		remain_total = statusex.ullTotalPhys % GBYTES;
		total = statusex.ullTotalPhys / GBYTES;
		avl = statusex.ullAvailPhys / GBYTES;
		remain_avl = statusex.ullAvailPhys % GBYTES;
		if (remain_total > 0)
			decimal_total = (remain_total / static_cast<double>(MBYTES)) / DKBYTES;
		if (remain_avl > 0)
			decimal_avl = (remain_avl / static_cast<double>(MBYTES)) / DKBYTES;

		decimal_total += (double)total;
		decimal_avl += (double)avl;
		char buffer[kMaxInfoBuffer];
		//sprintf_s(buffer, kMaxInfoBuffer, "total %.2f GB (%.2f GB available)", decimal_total, decimal_avl);
		std::sprintf(buffer, "total %.2f GB (%.2f GB available)", decimal_total, decimal_avl);
		memory_info.append(buffer);
	}
	std::cout << memory_info << std::endl;
}

int main() {
	std::cout << "===OS information===" << std::endl;
	getSysVersion();
	std::cout << "===CPU information===" << std::endl;
	getCPUID();
	std::cout << "===Memory information===" << std::endl;
	getMemoryInfo();
}

, there is still something fishy here.

The new version of GCC and OS passed the "test" on You won't believe how this simple trick defeated the unexplained bug destroying every top LGM; also included other languages such as Rust. The script mentioned there does not work for Rust now (both in custom test and on my Windows 11 computer).

I translate the script to C++:

#include "bits/stdc++.h"

int main() {
    constexpr size_t n = 30000;
    auto vec = std::vector<std::vector<int>>(n);
    for (size_t i = 10000; i < 20000; i++) {
        vec.resize(i);
        for (size_t j = 0; j < i; j++) {
            vec[j].reserve(7);
        }
        auto start = std::chrono::system_clock::now();
        for (size_t j = 0; j < 10000; j++) {
            vec[i - 1].reserve(7);
            vec[i - 1].shrink_to_fit();
        }
        auto elapsed = std::chrono::duration<double, std::milli>(std::chrono::system_clock::now() - start).count();
        if (elapsed > 40) {
            std::cout << i << " " << elapsed << "\n";
        }
    }
}

and also the bomb:

#include "bits/stdc++.h"

int main() {
    constexpr size_t n = 30000;
    constexpr size_t evil = 17182;
    auto vec = std::vector<std::vector<int>>(n);
    vec.resize(evil);
    for (size_t i = 0; i < evil; i++) {
        vec[i].reserve(7);
    }
    for (size_t i = 0; i < size_t(1e6); i++) {
        vec[evil - 1].reserve(7);
        vec[evil - 1].shrink_to_fit();
    }
}

And it generated the output Used: 5514 ms, 0 KB

As usual, I change constexpr size_t evil = 17182; to constexpr size_t evil = 17183;, and then Used: 93 ms, 0 KB

OOPS, MikeMirzayanov! Did you forget to upgrade compilers after migrating the CF system to Windows Server 2022? If that's true, please, upgrade those compilers to UCRT version, and I suggest you could simply use the one with LLVM for the simplest configuration for clang18 with GCC's libstdc++.

Rev.	By	When	Δ	Comment
en10	tatianyi	2024-05-17 19:16:15	225	Tiny change: Tiny difference between msvc and mingw64-gnu
en9	tatianyi	2024-05-17 15:23:08	100	Tiny change: add Clang in the installation command.
en8	tatianyi	2024-05-17 14:43:27	244	UPUPD change: msvc compiler also not suitable for this
en7	tatianyi	2024-05-17 14:16:28	1209	Add a combined version that automatically finds the bomb.
en6	tatianyi	2024-05-16 02:26:26	192	Tiny change: Headers for easy cross-platform.
en5	tatianyi	2024-05-13 15:06:03	38	Tiny change.
en4	tatianyi	2024-05-10 20:14:32	3365	Further investigation for MinGWs and MSYS2.
en3	tatianyi	2024-05-10 10:38:35	164	(published)
en2	tatianyi	2024-05-10 10:34:09	6695	Change: 'vector bomb'
en1	tatianyi	2024-05-10 09:30:16	4047	Initial revision (saved to drafts)

History