Submission #420573
Source Code Expand
system './' . $exe_name;
BEGIN {
$exe_name = $^O eq 'MSWin32' ? 'a.exe' : 'a.out';
return if -e $exe_name;
open my $fh, '>', 'tmp.cpp';
print $fh <<'CODE';
#line 8
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <set>
#include <map>
#include <queue>
#include <iostream>
#include <sstream>
#include <cstdio>
#include <cmath>
#include <ctime>
#include <cstring>
#include <cctype>
#include <limits>
#include <functional>
#include <cstdint>
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cassert>
#include <nmmintrin.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#ifdef _DEBUG
#undef assert
#include "C:\Dropbox\backup\implements\Util\MyAssert.hpp"
#define assert my_assert
#else
#undef assert
#define assert(x)
#endif
#define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i))
#define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i))
#define reu(i,l,u) for(int (i)=(int)(l);(i)<(int)(u);++(i))
#if defined(_MSC_VER) || __cplusplus > 199711L
#define aut(r,v) auto r = (v)
#else
#define aut(r,v) __typeof(v) r = (v)
#endif
#define each(it,o) for(aut(it, (o).begin()); it != (o).end(); ++ it)
#define all(o) (o).begin(), (o).end()
#define pb(x) push_back(x)
#define mp(x,y) make_pair((x),(y))
#define mset(m,v) memset(m,v,sizeof(m))
#define INF 0x3f3f3f3f
#define INFL 0x3f3f3f3f3f3f3f3fLL
using namespace std;
typedef vector<int> vi; typedef pair<int,int> pii; typedef vector<pair<int,int> > vpii; typedef long long ll;
template<typename T, typename U> inline void amin(T &x, U y) { if(y < x) x = y; }
template<typename T, typename U> inline void amax(T &x, U y) { if(x < y) x = y; }
#ifdef _MSC_VER
#define alignas(x) __declspec(align(x))
#endif
template<typename R_>
//typedef int R;
struct IntOpDefault {
typedef R_ R;
static void copy(R *res, const R *p, int n) {
for(int i = 0; i < n; ++ i)
res[i] = p[i];
}
static void fill_zero(R *p, int n) {
for(int i = 0; i < n; ++ i)
p[i] = R();
}
static void negate_all(R *res, const R *p, int n) {
for(int i = 0; i < n; ++ i)
res[i] = -p[i];
}
static void convolute_schoolbook(R *res, const R *p, int pn, const R *q, int qn) {
fill_zero(res, pn + qn - 1);
for(int i = 0; i < pn; ++ i)
for(int j = 0; j < qn; ++ j)
res[i + j] += p[i] * q[j];
}
static void add(R *res, const R *p, int n) {
for(int i = 0; i < n; ++ i)
res[i] += p[i];
}
static void subtract(R *res, const R *p, int n) {
for(int i = 0; i < n; ++ i)
res[i] -= p[i];
}
static R inverse(R x) {
R i = x, p, TWO = R(2), ONE = R(1);
do {
p = i * x;
i *= TWO - p;
}while(!(p == ONE));
return i;
}
static void multiply_scalar(R *p, int n, R scalar) {
for(int i = 0; i < n; ++ i)
p[i] *= scalar;
}
};
struct u32x4 {
__m128i v;
u32x4(): v(_mm_setzero_si128()) { }
u32x4(const __m128i &v_): v(v_) { }
static u32x4 set1(uint32_t x) {
return u32x4(_mm_set1_epi32(x));
}
template<typename T> static u32x4 loadu(const T *p) {
return u32x4(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
}
template<typename T> void storeu(T *p) const {
_mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
}
u32x4 operator*(const u32x4 &that) const {
return u32x4(_mm_mullo_epi32(v, that.v));
}
u32x4 operator+(const u32x4 &that) const {
return u32x4(_mm_add_epi32(v, that.v));
}
u32x4 operator-(const u32x4 &that) const {
return u32x4(_mm_sub_epi32(v, that.v));
}
u32x4 &operator+=(const u32x4 &that) {
return *this = *this + that;
}
template<int s> u32x4 slli() const {
return u32x4(_mm_slli_si128(v, s));
}
u32x4 slli4() const { return slli<4>(); }
u32x4 slli8() const { return slli<8>(); }
u32x4 slli12() const { return slli<12>(); }
template<int s> u32x4 srli() const {
return u32x4(_mm_srli_si128(v, s));
}
u32x4 srli4() const { return srli<4>(); }
u32x4 srli8() const { return srli<8>(); }
u32x4 srli12() const { return srli<12>(); }
};
template<int PN_of_4, int QN_of_4>
inline void add_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
static_assert(PN_of_4 >= QN_of_4, "PN_of_4 >= QN_of_4");
for(int i = 0; i < QN_of_4; ++ i) {
u32x4 sum = u32x4::loadu(p + i * 4) + u32x4::loadu(q + i * 4);
sum.storeu(res + i * 4);
}
for(int i = QN_of_4 * 4; i < PN_of_4 * 4; ++ i)
res[i] = p[i];
}
template<int N_of_4>
inline void add_template(uint32_t *p, const uint32_t *q) {
for(int i = 0; i < N_of_4; ++ i) {
u32x4 sum = u32x4::loadu(p + i * 4) + u32x4::loadu(q + i * 4);
sum.storeu(p + i * 4);
}
}
template<int PN_of_4, int QN_of_4>
inline void subtract_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
static_assert(PN_of_4 >= QN_of_4, "PN_of_4 >= QN_of_4");
for(int i = 0; i < QN_of_4; ++ i) {
u32x4 diff = u32x4::loadu(p + i * 4) - u32x4::loadu(q + i * 4);
diff.storeu(res + i * 4);
}
for(int i = QN_of_4 * 4; i < PN_of_4 * 4; ++ i)
res[i] = p[i];
}
template<int N_of_4>
inline void subtract_template(uint32_t *p, const uint32_t *q) {
for(int i = 0; i < N_of_4; ++ i) {
u32x4 diff = u32x4::loadu(p + i * 4) - u32x4::loadu(q + i * 4);
diff.storeu(p + i * 4);
}
}
struct IntOp32 : IntOpDefault<IntOp32> {
uint32_t x;
IntOp32(): x(0) { }
explicit IntOp32(uint32_t x_): x(x_) { }
IntOp32 &operator+=(const IntOp32 &that) { x += that.x; return *this; }
IntOp32 &operator-=(const IntOp32 &that) { x -= that.x; return *this; }
IntOp32 &operator*=(const IntOp32 &that) { x *= that.x; return *this; }
IntOp32 operator+(const IntOp32 &that) const { return IntOp32(x + that.x); }
IntOp32 operator-(const IntOp32 &that) const { return IntOp32(x - that.x); }
IntOp32 operator*(const IntOp32 &that) const { return IntOp32(x * that.x); }
IntOp32 operator-() const { return IntOp32(~x + 1); }
bool operator==(const IntOp32 &that) const { return x == that.x; }
//resは (PN_of_4 + QN_of_4) * 4 のサイズを書き込む
template<int PN_of_4, int QN_of_4>
static void convolute_schoolbook_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
u32x4 sum[PN_of_4 + QN_of_4];
for(int i = 0; i < PN_of_4; ++ i) {
u32x4 x0 = u32x4::set1(p[i * 4 + 0]);
u32x4 x1 = u32x4::set1(p[i * 4 + 1]);
u32x4 x2 = u32x4::set1(p[i * 4 + 2]);
u32x4 x3 = u32x4::set1(p[i * 4 + 3]);
for(int j = 0; j < QN_of_4; ++ j) {
u32x4 y = u32x4::loadu(q + j * 4);
u32x4 z0 = x0 * y;
u32x4 z1 = x1 * y;
u32x4 z2 = x2 * y;
u32x4 z3 = x3 * y;
sum[i + j + 0] += (z0 + z1.slli4()) + (z2.slli8() + z3.slli12());
sum[i + j + 1] += (z1.srli8() + z2.srli4() + z3).srli4();
}
}
for(int i = 0; i < PN_of_4 + QN_of_4; ++ i)
sum[i].storeu(res + i * 4);
}
enum { KARATSUBA_THRESHOLD_OF_4 = 4 };
#define ENABLE_KARATSUBA(PNo4, QNo4) \
((PNo4) >= KARATSUBA_THRESHOLD_OF_4 && (QNo4) >= KARATSUBA_THRESHOLD_OF_4)
template<int PNo4, int QNo4>
static typename enable_if<ENABLE_KARATSUBA(PNo4,QNo4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
enum { LOo4 = (PNo4 + 1) / 2, HPo4 = PNo4 - LOo4, HQo4 = QNo4 - LOo4 };
static_assert(0 < LOo4 && 0 < HQo4 && HPo4 <= LOo4 && HQo4 <= LOo4, "parameters");
uint32_t t0[LOo4 * 4], t1[LOo4 * 4], r1[LOo4 * 4 * 2];
uint32_t * const r0 = res, * const rinf = res + LOo4 * 4 * 2;
add_template<LOo4, HPo4>(t0, p, p + LOo4 * 4);
add_template<LOo4, HQo4>(t1, q, q + LOo4 * 4);
convolute_template<LOo4, LOo4>(r1, t0, t1);
convolute_template<LOo4, LOo4>(r0, p, q);
convolute_template<HPo4, HQo4>(rinf, p + LOo4 * 4, q + LOo4 * 4);
subtract_template<LOo4 * 2>(r1, r0);
subtract_template<HPo4 + HQo4>(r1, rinf);
add_template<LOo4 * 2>(res + LOo4 * 4, r1);
}
template<int PNo4, int QNo4>
static typename enable_if<!ENABLE_KARATSUBA(PNo4,QNo4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
return convolute_schoolbook_template<PNo4,QNo4>(res, p, q);
}
#undef ENABLE_KARATSUBA
};
enum { MaxN = 100004 };
alignas(16) uint32_t p[MaxN], q[MaxN], res[MaxN*2];
int main() {
int N;
scanf("%d", &N);
int l = 1;
while((1 << l) < N + 1 + N + 1) ++ l;
rep(i, N) {
int A, B;
scanf("%d%d", &A, &B);
p[i + 1] = A, q[i + 1] = B;
}
IntOp32::convolute_template<MaxN/4,MaxN/4>(res, p, q);
rer(i, 1, N + N) {
int ans = res[i];
printf("%d\n", ans);
}
return 0;
}
CODE
system "g++ -O2 -lm -mavx -std=c++11 tmp.cpp -o $exe_name 2> my_compile.log";
if($? != 0) {
open(my $fh, '<', 'my_compile.log');
while(<$fh>) { print STDERR $_; }
die 'compile error';
}
}
Submission Info
Submission Time
2015-06-06 21:33:09+0900
Task
C - 高速フーリエ変換
User
anta
Language
Perl (5.14.2)
Score
0
Code Size
9579 Byte
Status
RE
Exec Time
34 ms
Memory
1116 KB
Compile Error
cc1plus: error: unrecognized command line option ‘-std=c++11’
compile error at ./Main.pl line 299, <$fh> line 1.
BEGIN failed--compilation aborted at ./Main.pl line 301.
Judge Result
Set Name
Sample
All
Score / Max Score
0 / 0
0 / 100
Status
Set Name
Test Cases
Sample
00_sample_01
All
00_sample_01, 01_00_01, 01_01_19, 01_02_31, 01_03_22, 01_04_31, 01_05_40, 01_06_15, 01_07_39, 01_08_28, 01_09_30, 01_10_23, 01_11_33, 01_12_11, 01_13_28, 01_14_41, 01_15_26, 01_16_49, 01_17_34, 01_18_02, 01_19_33, 01_20_29, 02_00_51254, 02_01_82431, 02_02_17056, 02_03_34866, 02_04_6779, 02_05_65534, 02_06_65535, 02_07_65536, 02_08_65537, 02_09_65538, 02_10_100000
Case Name
Status
Exec Time
Memory
00_sample_01
RE
30 ms
1048 KB
01_00_01
RE
34 ms
1108 KB
01_01_19
RE
33 ms
1108 KB
01_02_31
RE
30 ms
1112 KB
01_03_22
RE
30 ms
1052 KB
01_04_31
RE
29 ms
1092 KB
01_05_40
RE
29 ms
1056 KB
01_06_15
RE
29 ms
1056 KB
01_07_39
RE
30 ms
1092 KB
01_08_28
RE
30 ms
1112 KB
01_09_30
RE
29 ms
1060 KB
01_10_23
RE
29 ms
1104 KB
01_11_33
RE
29 ms
1060 KB
01_12_11
RE
29 ms
1064 KB
01_13_28
RE
29 ms
1068 KB
01_14_41
RE
29 ms
1112 KB
01_15_26
RE
29 ms
1052 KB
01_16_49
RE
28 ms
1116 KB
01_17_34
RE
29 ms
1060 KB
01_18_02
RE
29 ms
1100 KB
01_19_33
RE
29 ms
1112 KB
01_20_29
RE
29 ms
1064 KB
02_00_51254
RE
30 ms
1056 KB
02_01_82431
RE
29 ms
1068 KB
02_02_17056
RE
30 ms
1108 KB
02_03_34866
RE
28 ms
1104 KB
02_04_6779
RE
30 ms
1108 KB
02_05_65534
RE
28 ms
1100 KB
02_06_65535
RE
29 ms
1064 KB
02_07_65536
RE
30 ms
1104 KB
02_08_65537
RE
29 ms
1112 KB
02_09_65538
RE
30 ms
1100 KB
02_10_100000
RE
30 ms
1076 KB