Submission #420573


Source Code Expand

system './' . $exe_name;

BEGIN {
    $exe_name = $^O eq 'MSWin32' ? 'a.exe' : 'a.out';
    return if -e $exe_name;
    open my $fh, '>', 'tmp.cpp';
    print $fh <<'CODE';
#line 8
#include <string>
#include <vector>
#include <algorithm>
#include <numeric>
#include <set>
#include <map>
#include <queue>
#include <iostream>
#include <sstream>
#include <cstdio>
#include <cmath>
#include <ctime>
#include <cstring>
#include <cctype>
#include <limits>
#include <functional>

#include <cstdint>

#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cassert>

#include <nmmintrin.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif

#ifdef _DEBUG
#undef assert
#include "C:\Dropbox\backup\implements\Util\MyAssert.hpp"
#define assert my_assert
#else
#undef assert
#define assert(x) 
#endif

#define rep(i,n) for(int (i)=0;(i)<(int)(n);++(i))
#define rer(i,l,u) for(int (i)=(int)(l);(i)<=(int)(u);++(i))
#define reu(i,l,u) for(int (i)=(int)(l);(i)<(int)(u);++(i))
#if defined(_MSC_VER) || __cplusplus > 199711L
#define aut(r,v) auto r = (v)
#else
#define aut(r,v) __typeof(v) r = (v)
#endif
#define each(it,o) for(aut(it, (o).begin()); it != (o).end(); ++ it)
#define all(o) (o).begin(), (o).end()
#define pb(x) push_back(x)
#define mp(x,y) make_pair((x),(y))
#define mset(m,v) memset(m,v,sizeof(m))
#define INF 0x3f3f3f3f
#define INFL 0x3f3f3f3f3f3f3f3fLL
using namespace std;
typedef vector<int> vi; typedef pair<int,int> pii; typedef vector<pair<int,int> > vpii; typedef long long ll;
template<typename T, typename U> inline void amin(T &x, U y) { if(y < x) x = y; }
template<typename T, typename U> inline void amax(T &x, U y) { if(x < y) x = y; }

#ifdef _MSC_VER
#define alignas(x) __declspec(align(x))
#endif

template<typename R_>
//typedef int R;
struct IntOpDefault {
    typedef R_ R;
    static void copy(R *res, const R *p, int n) {
        for(int i = 0; i < n; ++ i)
            res[i] = p[i];
    }

    static void fill_zero(R *p, int n) {
        for(int i = 0; i < n; ++ i)
            p[i] = R();
    }

    static void negate_all(R *res, const R *p, int n) {
        for(int i = 0; i < n; ++ i)
            res[i] = -p[i];
    }

    static void convolute_schoolbook(R *res, const R *p, int pn, const R *q, int qn) {
        fill_zero(res, pn + qn - 1);
        for(int i = 0; i < pn; ++ i)
            for(int j = 0; j < qn; ++ j)
                res[i + j] += p[i] * q[j];
    }

    static void add(R *res, const R *p, int n) {
        for(int i = 0; i < n; ++ i)
            res[i] += p[i];
    }

    static void subtract(R *res, const R *p, int n) {
        for(int i = 0; i < n; ++ i)
            res[i] -= p[i];
    }

    static R inverse(R x) {
        R i = x, p, TWO = R(2), ONE = R(1);
        do {
            p = i * x;
            i *= TWO - p;
        }while(!(p == ONE));
        return i;
    }

    static void multiply_scalar(R *p, int n, R scalar) {
        for(int i = 0; i < n; ++ i)
            p[i] *= scalar;
    }
};

struct u32x4 {
    __m128i v;
    u32x4(): v(_mm_setzero_si128()) { }
    u32x4(const __m128i &v_): v(v_) { }

    static u32x4 set1(uint32_t x) {
        return u32x4(_mm_set1_epi32(x));
    }
    template<typename T> static u32x4 loadu(const T *p) {
        return u32x4(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
    }
    template<typename T> void storeu(T *p) const {
        _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
    }

    u32x4 operator*(const u32x4 &that) const {
        return u32x4(_mm_mullo_epi32(v, that.v));
    }
    u32x4 operator+(const u32x4 &that) const {
        return u32x4(_mm_add_epi32(v, that.v));
    }
    u32x4 operator-(const u32x4 &that) const {
        return u32x4(_mm_sub_epi32(v, that.v));
    }
    u32x4 &operator+=(const u32x4 &that) {
        return *this = *this + that;
    }

    template<int s> u32x4 slli() const {
        return u32x4(_mm_slli_si128(v, s));
    }
    u32x4 slli4() const { return slli<4>(); }
    u32x4 slli8() const { return slli<8>(); }
    u32x4 slli12() const { return slli<12>(); }

    template<int s> u32x4 srli() const {
        return u32x4(_mm_srli_si128(v, s));
    }

    u32x4 srli4() const { return srli<4>(); }
    u32x4 srli8() const { return srli<8>(); }
    u32x4 srli12() const { return srli<12>(); }
};

template<int PN_of_4, int QN_of_4>
inline void add_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
    static_assert(PN_of_4 >= QN_of_4, "PN_of_4 >= QN_of_4");
    for(int i = 0; i < QN_of_4; ++ i) {
        u32x4 sum = u32x4::loadu(p + i * 4) + u32x4::loadu(q + i * 4);
        sum.storeu(res + i * 4);
    }
    for(int i = QN_of_4 * 4; i < PN_of_4 * 4; ++ i)
        res[i] = p[i];
}
template<int N_of_4>
inline void add_template(uint32_t *p, const uint32_t *q) {
    for(int i = 0; i < N_of_4; ++ i) {
        u32x4 sum = u32x4::loadu(p + i * 4) + u32x4::loadu(q + i * 4);
        sum.storeu(p + i * 4);
    }
}

template<int PN_of_4, int QN_of_4>
inline void subtract_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
    static_assert(PN_of_4 >= QN_of_4, "PN_of_4 >= QN_of_4");
    for(int i = 0; i < QN_of_4; ++ i) {
        u32x4 diff = u32x4::loadu(p + i * 4) - u32x4::loadu(q + i * 4);
        diff.storeu(res + i * 4);
    }
    for(int i = QN_of_4 * 4; i < PN_of_4 * 4; ++ i)
        res[i] = p[i];
}
template<int N_of_4>
inline void subtract_template(uint32_t *p, const uint32_t *q) {
    for(int i = 0; i < N_of_4; ++ i) {
        u32x4 diff = u32x4::loadu(p + i * 4) - u32x4::loadu(q + i * 4);
        diff.storeu(p + i * 4);
    }
}

struct IntOp32 : IntOpDefault<IntOp32> {
    uint32_t x;
    IntOp32(): x(0) { }
    explicit IntOp32(uint32_t x_): x(x_) { }

    IntOp32 &operator+=(const IntOp32 &that) { x += that.x; return *this; }
    IntOp32 &operator-=(const IntOp32 &that) { x -= that.x; return *this; }
    IntOp32 &operator*=(const IntOp32 &that) { x *= that.x; return *this; }

    IntOp32 operator+(const IntOp32 &that) const { return IntOp32(x + that.x); }
    IntOp32 operator-(const IntOp32 &that) const { return IntOp32(x - that.x); }
    IntOp32 operator*(const IntOp32 &that) const { return IntOp32(x * that.x); }
    IntOp32 operator-() const { return IntOp32(~x + 1); }

    bool operator==(const IntOp32 &that) const { return x == that.x; }

    //resは (PN_of_4 + QN_of_4) * 4 のサイズを書き込む
    template<int PN_of_4, int QN_of_4>
    static void convolute_schoolbook_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
        u32x4 sum[PN_of_4 + QN_of_4];

        for(int i = 0; i < PN_of_4; ++ i) {
            u32x4 x0 = u32x4::set1(p[i * 4 + 0]);
            u32x4 x1 = u32x4::set1(p[i * 4 + 1]);
            u32x4 x2 = u32x4::set1(p[i * 4 + 2]);
            u32x4 x3 = u32x4::set1(p[i * 4 + 3]);

            for(int j = 0; j < QN_of_4; ++ j) {
                u32x4 y = u32x4::loadu(q + j * 4);
                u32x4 z0 = x0 * y;
                u32x4 z1 = x1 * y;
                u32x4 z2 = x2 * y;
                u32x4 z3 = x3 * y;

                sum[i + j + 0] += (z0 + z1.slli4()) + (z2.slli8() + z3.slli12());
                sum[i + j + 1] += (z1.srli8() + z2.srli4() + z3).srli4();
            }
        }

        for(int i = 0; i < PN_of_4 + QN_of_4; ++ i)
            sum[i].storeu(res + i * 4);
    }

    enum { KARATSUBA_THRESHOLD_OF_4 = 4 };

#define ENABLE_KARATSUBA(PNo4, QNo4) \
    ((PNo4) >= KARATSUBA_THRESHOLD_OF_4 && (QNo4) >= KARATSUBA_THRESHOLD_OF_4)

    template<int PNo4, int QNo4>
    static typename enable_if<ENABLE_KARATSUBA(PNo4,QNo4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
        enum { LOo4 = (PNo4 + 1) / 2, HPo4 = PNo4 - LOo4, HQo4 = QNo4 - LOo4 };
        static_assert(0 < LOo4 && 0 < HQo4 && HPo4 <= LOo4 && HQo4 <= LOo4, "parameters");
        uint32_t t0[LOo4 * 4], t1[LOo4 * 4], r1[LOo4 * 4 * 2];
        uint32_t * const r0 = res, * const rinf = res + LOo4 * 4 * 2;
        add_template<LOo4, HPo4>(t0, p, p + LOo4 * 4);
        add_template<LOo4, HQo4>(t1, q, q + LOo4 * 4);
        convolute_template<LOo4, LOo4>(r1, t0, t1);
        convolute_template<LOo4, LOo4>(r0, p, q);
        convolute_template<HPo4, HQo4>(rinf, p + LOo4 * 4, q + LOo4 * 4);
        subtract_template<LOo4 * 2>(r1, r0);
        subtract_template<HPo4 + HQo4>(r1, rinf);
        add_template<LOo4 * 2>(res + LOo4 * 4, r1);
    }

    template<int PNo4, int QNo4>
    static typename enable_if<!ENABLE_KARATSUBA(PNo4,QNo4)>::type convolute_template(uint32_t *res, const uint32_t *p, const uint32_t *q) {
        return convolute_schoolbook_template<PNo4,QNo4>(res, p, q);
    }

#undef ENABLE_KARATSUBA
};

enum { MaxN = 100004 };
alignas(16) uint32_t p[MaxN], q[MaxN], res[MaxN*2];
int main() {
	int N;
	scanf("%d", &N);
	int l = 1;
	while((1 << l) < N + 1 + N + 1) ++ l;
	rep(i, N) {
		int A, B;
		scanf("%d%d", &A, &B);
		p[i + 1] = A, q[i + 1] = B;
	}
    IntOp32::convolute_template<MaxN/4,MaxN/4>(res, p, q);
    rer(i, 1, N + N) {
        int ans = res[i];
        printf("%d\n", ans);
    }
    return 0;
}

CODE
    system "g++ -O2 -lm -mavx -std=c++11 tmp.cpp -o $exe_name 2> my_compile.log";
    if($? != 0) {
        open(my $fh, '<', 'my_compile.log');
        while(<$fh>) { print STDERR $_; }
        die 'compile error';
    }
}

Submission Info

Submission Time
Task C - 高速フーリエ変換
User anta
Language Perl (5.14.2)
Score 0
Code Size 9579 Byte
Status RE
Exec Time 34 ms
Memory 1116 KB

Compile Error

cc1plus: error: unrecognized command line option ‘-std=c++11’
compile error at ./Main.pl line 299, <$fh> line 1.
BEGIN failed--compilation aborted at ./Main.pl line 301.

Judge Result

Set Name Sample All
Score / Max Score 0 / 0 0 / 100
Status
RE × 1
RE × 33
Set Name Test Cases
Sample 00_sample_01
All 00_sample_01, 01_00_01, 01_01_19, 01_02_31, 01_03_22, 01_04_31, 01_05_40, 01_06_15, 01_07_39, 01_08_28, 01_09_30, 01_10_23, 01_11_33, 01_12_11, 01_13_28, 01_14_41, 01_15_26, 01_16_49, 01_17_34, 01_18_02, 01_19_33, 01_20_29, 02_00_51254, 02_01_82431, 02_02_17056, 02_03_34866, 02_04_6779, 02_05_65534, 02_06_65535, 02_07_65536, 02_08_65537, 02_09_65538, 02_10_100000
Case Name Status Exec Time Memory
00_sample_01 RE 30 ms 1048 KB
01_00_01 RE 34 ms 1108 KB
01_01_19 RE 33 ms 1108 KB
01_02_31 RE 30 ms 1112 KB
01_03_22 RE 30 ms 1052 KB
01_04_31 RE 29 ms 1092 KB
01_05_40 RE 29 ms 1056 KB
01_06_15 RE 29 ms 1056 KB
01_07_39 RE 30 ms 1092 KB
01_08_28 RE 30 ms 1112 KB
01_09_30 RE 29 ms 1060 KB
01_10_23 RE 29 ms 1104 KB
01_11_33 RE 29 ms 1060 KB
01_12_11 RE 29 ms 1064 KB
01_13_28 RE 29 ms 1068 KB
01_14_41 RE 29 ms 1112 KB
01_15_26 RE 29 ms 1052 KB
01_16_49 RE 28 ms 1116 KB
01_17_34 RE 29 ms 1060 KB
01_18_02 RE 29 ms 1100 KB
01_19_33 RE 29 ms 1112 KB
01_20_29 RE 29 ms 1064 KB
02_00_51254 RE 30 ms 1056 KB
02_01_82431 RE 29 ms 1068 KB
02_02_17056 RE 30 ms 1108 KB
02_03_34866 RE 28 ms 1104 KB
02_04_6779 RE 30 ms 1108 KB
02_05_65534 RE 28 ms 1100 KB
02_06_65535 RE 29 ms 1064 KB
02_07_65536 RE 30 ms 1104 KB
02_08_65537 RE 29 ms 1112 KB
02_09_65538 RE 30 ms 1100 KB
02_10_100000 RE 30 ms 1076 KB