Submission #11593331


Source Code Expand

#include <bits/stdc++.h>
using namespace std;
void solve();
int main() { solve(); }
constexpr uint32_t MOD = 998244353;

#ifdef NyaanDebug
#define long long long
#endif

/**
 * @brief 高速入出力
 * @author えびちゃん
 * @see https://qiita.com/rsk0315_h4x/items/17a9cb12e0de5fd918f4
 */
namespace fast {
static constexpr size_t buf_size = 1 << 17;
static constexpr size_t margin = 1;
static char inbuf[buf_size + margin] = {};
static __attribute__((aligned(8))) char outbuf[buf_size + margin] = {};
static __attribute__((aligned(8))) char minibuf[32];
static constexpr size_t int_digits = 20;  // 18446744073709551615
static constexpr uintmax_t digit_mask = 0x3030303030303030;
static constexpr uintmax_t first_mask = 0x00FF00FF00FF00FF;
static constexpr uintmax_t second_mask = 0x0000FFFF0000FFFF;
static constexpr uintmax_t third_mask = 0x00000000FFFFFFFF;
static constexpr uintmax_t tenpow[] = {
    1,
    10,
    100,
    1000,
    10000,
    100000,
    1000000,
    10000000,
    100000000,
    1000000000,
    10000000000,
    100000000000,
    1000000000000,
    10000000000000,
    100000000000000,
    1000000000000000,
    10000000000000000,
    100000000000000000,
    1000000000000000000,
    10000000000000000000u,
};
static __attribute__((
    aligned(8))) char inttab[40000] = {};  // 4-digit integers (10000 many)
static char S_sep = ' ', S_end = '\n';
template <typename Tp>
using enable_if_integral = std::enable_if<std::is_integral<Tp>::value, Tp>;

class scanner {
  char *pos = inbuf;
  char *endpos = inbuf + buf_size;

  void M_read_from_stdin() { endpos = inbuf + fread(pos, 1, buf_size, stdin); }
  void M_reread_from_stdin() {
    ptrdiff_t len = endpos - pos;
    if (!(inbuf + len <= pos)) return;
    memcpy(inbuf, pos, len);
    char *tmp = inbuf + len;
    endpos = tmp + fread(tmp, 1, buf_size - len, stdin);
    *endpos = 0;
    pos = inbuf;
  }

 public:
  scanner() { M_read_from_stdin(); }

  template <typename Integral,
            typename enable_if_integral<Integral>::type * = nullptr>
  void scan_parallel(Integral &x) {
    if (__builtin_expect(endpos <= pos + int_digits, 0)) M_reread_from_stdin();
    bool ends = false;
    typename std::make_unsigned<Integral>::type y = 0;
    bool neg = false;
    if (std::is_signed<Integral>::value && *pos == '-') {
      neg = true;
      ++pos;
    }
    do {
      memcpy(minibuf, pos, 8);
      long c = *(long *)minibuf;
      long d = (c & digit_mask) ^ digit_mask;
      int skip = 8;
      int shift = 8;
      if (d) {
        int ctz = __builtin_ctzl(d);
        if (ctz == 4) break;
        c &= (1L << (ctz - 5)) - 1;
        int discarded = (68 - ctz) / 8;
        shift -= discarded;
        c <<= discarded * 8;
        skip -= discarded;
        ends = true;
      }
      c |= digit_mask;
      c ^= digit_mask;
      c = ((c >> 8) + c * 10) & first_mask;
      c = ((c >> 16) + c * 100) & second_mask;
      c = ((c >> 32) + c * 10000) & third_mask;
      y = y * tenpow[shift] + c;
      pos += skip;
    } while (!ends);
    x = (neg ? -y : y);
    ++pos;
  }

  template <typename Integral,
            typename enable_if_integral<Integral>::type * = nullptr>
  void scan_serial(Integral &x) {
    if (__builtin_expect(endpos <= pos + int_digits, 0)) M_reread_from_stdin();
    bool neg = false;
    if (std::is_signed<Integral>::value && *pos == '-') {
      neg = true;
      ++pos;
    }
    typename std::make_unsigned<Integral>::type y = *pos - '0';
    while (*++pos >= '0') y = 10 * y + *pos - '0';
    x = (neg ? -y : y);
    ++pos;
  }

  template <typename Integral,
            typename enable_if_integral<Integral>::type * = nullptr>
  // Use scan_parallel(x) only when x may be too large (about 10^12).
  // Otherwise, even when x <= 10^9, scan_serial(x) may be faster.
  void scan(Integral &x) {
    scan_parallel(x);
  }

  void scan_serial(std::string &s) {
    // until first whitespace
    s = "";
    do {
      char *startpos = pos;
      while (*pos > ' ') ++pos;
      s += std::string(startpos, pos);
      if (*pos != 0) {
        ++pos;  // skip the space
        break;
      }
      M_reread_from_stdin();
    } while (true);
  }

  void scan(std::string &s) { scan_serial(s); }

  template <typename Tp, typename... Args>
  void scan(Tp &x, Args &&... xs) {
    scan(x);
    scan(std::forward<Args>(xs)...);
  }
};

class printer {
  char *pos = outbuf;

  void M_flush_stdout() {
    fwrite(outbuf, 1, pos - outbuf, stdout);
    pos = outbuf;
  }

  static int S_int_digits(uintmax_t n) {
    if (n < tenpow[16]) {                 // 1
      if (n < tenpow[8]) {                // 2
        if (n < tenpow[4]) {              // 3
          if (n < tenpow[2]) {            // 4
            if (n < tenpow[1]) return 1;  // 5
            return 2;                     // 5
          }
          if (n < tenpow[3]) return 3;  // 4
          return 4;                     // 4
        }
        if (n < tenpow[6]) {            // 4
          if (n < tenpow[5]) return 5;  // 5
          return 6;                     // 5
        }
        if (n < tenpow[7]) return 7;  // 5
        return 8;                     // 5
      }
      if (n < tenpow[12]) {             // 3
        if (n < tenpow[10]) {           // 4
          if (n < tenpow[9]) return 9;  // 5
          return 10;                    // 5
        }
        if (n < tenpow[11]) return 11;  // 5
        return 12;                      // 5
      }
      if (n < tenpow[14]) {             // 4
        if (n < tenpow[13]) return 13;  // 5
        return 14;                      // 5
      }
      if (n < tenpow[15]) return 15;  // 5
      return 16;                      // 5
    }
    if (n < tenpow[18]) {             // 2
      if (n < tenpow[17]) return 17;  // 3
      return 18;                      // 3
    }
    return 19;  // 2
    // if (n < tenpow[19]) return 19;  // 3
    // return 20;  // 3
  }

  void M_precompute() {
    unsigned long const digit1 = 0x0200000002000000;
    unsigned long const digit2 = 0xf600fffff6010000;
    unsigned long const digit3 = 0xfff600fffff60100;
    unsigned long const digit4 = 0xfffff600fffff601;
    unsigned long counter = 0x3130303030303030;
    for (int i = 0, i4 = 0; i4 < 10; ++i4, counter += digit4)
      for (int i3 = 0; i3 < 10; ++i3, counter += digit3)
        for (int i2 = 0; i2 < 10; ++i2, counter += digit2)
          for (int i1 = 0; i1 < 5; ++i1, ++i, counter += digit1)
            *((unsigned long *)inttab + i) = counter;
  }

 public:
  printer() { M_precompute(); }
  ~printer() { M_flush_stdout(); }

  void print(char c) {
    if (__builtin_expect(pos + 1 >= outbuf + buf_size, 0)) M_flush_stdout();
    *pos++ = c;
  }

  template <size_t N>
  void print(char const (&s)[N]) {
    if (__builtin_expect(pos + N >= outbuf + buf_size, 0)) M_flush_stdout();
    memcpy(pos, s, N - 1);
    pos += N - 1;
  }

  void print(char const *s) {
    // FIXME: strlen や memcpy などで定数倍高速化したい
    while (*s != 0) {
      *pos++ = *s++;
      if (pos == outbuf + buf_size) M_flush_stdout();
    }
  }

  void print(std::string const &s) { print(s.data()); }

  template <typename Integral,
            typename enable_if_integral<Integral>::type * = nullptr>
  void print(Integral x) {
    if (__builtin_expect(pos + int_digits >= outbuf + buf_size, 0))
      M_flush_stdout();
    if (x == 0) {
      *pos++ = '0';
      return;
    }
    if (x < 0) {
      *pos++ = '-';
      if (__builtin_expect(x == std::numeric_limits<Integral>::min(), 0)) {
        switch (sizeof x) {
          case 2:
            print("32768");
            return;
          case 4:
            print("2147483648");
            return;
          case 8:
            print("9223372036854775808");
            return;
        }
      }
      x = -x;
    }

    typename std::make_unsigned<Integral>::type y = x;
    int len = S_int_digits(y);
    pos += len;
    char *tmp = pos;
    int w = (pos - outbuf) & 3;
    if (w > len) w = len;
    for (int i = w; i > 0; --i) {
      *--tmp = y % 10 + '0';
      y /= 10;
    }
    len -= w;
    while (len >= 4) {
      tmp -= 4;
      *(unsigned *)tmp = *((unsigned *)inttab + (y % 10000));
      len -= 4;
      if (len) y /= 10000;
    }
    while (len-- > 0) {
      *--tmp = y % 10 + '0';
      y /= 10;
    }
  }

  template <typename Tp, typename... Args>
  void print(Tp const &x, Args &&... xs) {
    if (sizeof...(Args) > 0) {
      print(x);
      print(S_sep);
      print(std::forward<Args>(xs)...);
    }
  }

  template <typename Tp>
  void println(Tp const &x) {
    print(x), print(S_end);
  }

  template <typename Tp, typename... Args>
  void println(Tp const &x, Args &&... xs) {
    print(x, std::forward<Args>(xs)...);
    print(S_end);
  }

  static void set_sep(char c) { S_sep = c; }
  static void set_end(char c) { S_end = c; }
};
}  // namespace fast

fast::scanner fastin;
fast::printer fastout;

template< int mod >
struct ModInt {
  int x;

  ModInt() : x(0) {}

  ModInt(int64_t y) : x(y >= 0 ? y % mod : (mod - (-y) % mod) % mod) {}

  ModInt &operator+=(const ModInt &p) {
    if((x += p.x) >= mod) x -= mod;
    return *this;
  }

  ModInt &operator-=(const ModInt &p) {
    if((x += mod - p.x) >= mod) x -= mod;
    return *this;
  }

  ModInt &operator*=(const ModInt &p) {
    x = (int) (1LL * x * p.x % mod);
    return *this;
  }

  ModInt &operator/=(const ModInt &p) {
    *this *= p.inverse();
    return *this;
  }

  ModInt operator-() const { return ModInt(-x); }

  ModInt operator+(const ModInt &p) const { return ModInt(*this) += p; }

  ModInt operator-(const ModInt &p) const { return ModInt(*this) -= p; }

  ModInt operator*(const ModInt &p) const { return ModInt(*this) *= p; }

  ModInt operator/(const ModInt &p) const { return ModInt(*this) /= p; }

  bool operator==(const ModInt &p) const { return x == p.x; }

  bool operator!=(const ModInt &p) const { return x != p.x; }

  ModInt inverse() const {
    int a = x, b = mod, u = 1, v = 0, t;
    while(b > 0) {
      t = a / b;
      swap(a -= t * b, b);
      swap(u -= t * v, v);
    }
    return ModInt(u);
  }

  ModInt pow(int64_t n) const {
    ModInt ret(1), mul(x);
    while(n > 0) {
      if(n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    }
    return ret;
  }

  friend ostream &operator<<(ostream &os, const ModInt &p) {
    return os << p.x;
  }

  friend istream &operator>>(istream &is, ModInt &a) {
    int64_t t;
    is >> t;
    a = ModInt< mod >(t);
    return (is);
  }

  static constexpr int get_mod() { return mod; }
};

static constexpr uint32_t get_pr(uint32_t mod) {
  using u64 = uint64_t;
  u64 ds[32] = {};
  int idx = 0;
  u64 m = mod - 1;
  for (u64 i = 2; i * i <= m; ++i) {
    if (m % i == 0) {
      ds[idx++] = i;
      while (m % i == 0) m /= i;
    }
  }
  if (m != 1) ds[idx++] = m;

  uint32_t pr = 2;
  while (1) {
    int flg = 1;
    for (int i = 0; i < idx; i++) {
      u64 a = pr, b = (mod - 1) / ds[i], r = 1;
      while (b) {
        if (b & 1) r = r * a % mod;
        a = a * a % mod;
        b >>= 1;
      }
      if (r == 1) {
        flg = 0;
        break;
      }
    }
    if (flg == 1) break;
    ++pr;
  }
  return pr;
};

template <typename mint>
struct NTT {
  static constexpr uint32_t mod = mint::get_mod();
  static constexpr uint32_t pr = get_pr(mod);
  vector<mint> w, y;

  void setwy(int M) {
    if ((int)w.size() >= (M >> 1)) return;
    w.resize(M >> 1);
    y.resize(M >> 1);
    mint z = mint(pr).pow((mod - 1) / M);
    mint x = z.inverse();
    int j = M >> 1;
    while (j >>= 1) {
      w[j] = z, y[j] = x;
      z *= z, x *= x;
    }
    y[0] = w[0] = mint(1);
    j = M >> 1;
    for (int js = 2; js < j; js <<= 1) {
      z = w[js], x = y[js];
      for (int k = js + 1, k2 = 1; k2 < js; ++k, ++k2) {
        w[k] = w[k2] * z, y[k] = y[k2] * x;
      }
    }
  }

  void fft4(vector<mint> &a, int k) {
    if (k & 1) {
      int v = 1 << (k - 1);
      for (int j = 0; j < v; ++j) {
        mint ajv = a[j + v];
        a[j + v] = a[j] - ajv;
        a[j] += ajv;
      }
    }
    int u = 1 << (k & 1);
    int v = 1 << (k - 2 - (k & 1));
    while (v) {
      // jh = 0
      {
        int j0 = 0;
        int j1 = v;
        int j2 = j1 + v;
        int j3 = j2 + v;
        for (; j0 < v; ++j0, ++j1, ++j2, ++j3) {
          mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
          mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * w[1];
          a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;
        }
      }
      // jh >= 1
      for (int jh = 1; jh < u; ++jh) {
        int j0 = jh * v * 4;
        int j1 = j0 + v;
        int j2 = j1 + v;
        int j3 = j2 + v;
        int je = j1;
        mint ww = w[jh], xx = w[jh << 1];
        mint wx = ww * xx;
        for ( ; j0 < je; ++j0, ++j1, ++j2, ++j3){
          mint t0 = a[j0], t1 = a[j1] * xx , t2 = a[j2] * ww, t3 = a[j3] * wx;
          mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * w[1];
          a[j0] = t0p2 + t1p3 , a[j1] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3 , a[j3] = t0m2 - t1m3; 
        }
      }
      u <<= 2;
      v >>= 2;
    }
  }

  /*
  void fft4_parallel(vector<mint> &a, vector<mint> &b, int k) {
    if (k & 1) {
      int v = 1 << (k - 1);
      for (int j = 0; j < v; ++j) {
        mint ajv = a[j + v] , bjv = b[j + v];
        a[j + v] = a[j] - ajv, b[j + v] = b[j] - bjv;
        a[j] += ajv , b[j] += bjv; 
      }
    }
    int u = 1 << (k & 1);
    int v = 1 << (k - 2 - (k & 1));
    while (v) {
      // jh = 0
      {
        int j0 = 0;
        int j1 = v;
        int j2 = j1 + v;
        int j3 = j2 + v;
        for (; j0 < v; ++j0, ++j1, ++j2, ++j3) {
          mint t0 = a[j0], t1 = a[j1], t2 = a[j2], t3 = a[j3];
          mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * w[1];
          a[j0] = t0p2 + t1p3, a[j1] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3, a[j3] = t0m2 - t1m3;
          
          mint u0 = b[j0], u1 = b[j1], u2 = b[j2], u3 = b[j3];
          mint u0p2 = u0 + u2, u1p3 = u1 + u3;
          mint u0m2 = u0 - u2, u1m3 = (u1 - u3) * w[1];
          b[j0] = u0p2 + u1p3, b[j1] = u0p2 - u1p3;
          b[j2] = u0m2 + u1m3, b[j3] = u0m2 - u1m3;
        }
      }
      // jh >= 1
      for (int jh = 1; jh < u; ++jh) {
        int j0 = jh * v * 4;
        int j1 = j0 + v;
        int j2 = j1 + v;
        int j3 = j2 + v;
        int je = j1;
        mint ww = w[jh], xx = w[jh << 1];
        mint wx = ww * xx;
        for ( ; j0 < je; ++j0, ++j1, ++j2, ++j3){
          mint t0 = a[j0], t1 = a[j1] * xx , t2 = a[j2] * ww, t3 = a[j3] * wx;
          mint t0p2 = t0 + t2, t1p3 = t1 + t3;
          mint t0m2 = t0 - t2, t1m3 = (t1 - t3) * w[1];
          a[j0] = t0p2 + t1p3 , a[j1] = t0p2 - t1p3;
          a[j2] = t0m2 + t1m3 , a[j3] = t0m2 - t1m3; 
          
          mint u0 = b[j0], u1 = b[j1] * xx , u2 = b[j2] * ww, u3 = b[j3] * wx;
          mint u0p2 = u0 + u2, u1p3 = u1 + u3;
          mint u0m2 = u0 - u2, u1m3 = (u1 - u3) * w[1];
          b[j0] = u0p2 + u1p3 , b[j1] = u0p2 - u1p3;
          b[j2] = u0m2 + u1m3 , b[j3] = u0m2 - u1m3; 
        }
      }
      u <<= 2;
      v >>= 2;
    }
  }
  */

  void ifft4(vector<mint> &a, int k) {
    int u = 1 << (k - 2);
    int v = 1;
    while (u) {
      // jh = 0
      {
        int j0 = 0;
        int j2 = v + v;
        for (; j0 < v; ++j0, ++j2) {
          mint t0 = a[j0], t1 = a[j0 + v], t2 = a[j2], t3 = a[j2 + v];
          mint t0p1 = t0 + t1, t2p3 = t2 + t3;
          mint t0m1 = t0 - t1, t2m3 = (t2 - t3) * y[1];
          a[j0] = t0p1 + t2p3, a[j2] = t0p1 - t2p3;
          a[j0 + v] = t0m1 + t2m3, a[j2 + v] = t0m1 - t2m3;
        }
      }
      // jh >= 1
      for (int jh = 1; jh < u; ++jh) {
        int j0 = (jh * v) << 2;
        int je = j0 + v;
        int j2 = je + v;
        mint ww = y[jh];
        mint xx = y[jh << 1];
        mint yy = y[(jh << 1) + 1];
        for (; j0 < je; ++j0, ++j2) {
          mint t1 = a[j0 + v], t3 = a[j2 + v];
          mint t0p1 = a[j0] + t1, t2p3 = a[j2] + t3;
          mint t0m1 = (a[j0] - t1) * xx, t2m3 = (a[j2] - t3) * yy;
          a[j0] = t0p1 + t2p3, a[j2] = (t0p1 - t2p3) * ww;
          a[j0 + v] = t0m1 + t2m3, a[j2 + v] = (t0m1 - t2m3) * ww;
        }
      }
      u >>= 2;
      v <<= 2;
    }
    if (k & 1) {
      u = 1 << (k - 1);
      for (int j = 0; j < u; j++) {
        mint ajv = a[j] - a[j + u];
        a[j] += a[j + u];
        a[j + u] = ajv;
      }
    }
  }

  vector<mint> multiply(vector<mint> s, vector<mint> t) {
    int l = s.size() + t.size() - 1;
    int k = 2, M = 4;
    while (M < l) M <<= 1, ++k;
    setwy(M);
    s.resize(M);
    t.resize(M);
    fft4(s, k);
    fft4(t, k);
    for (int i = 0; i < M; ++i) s[i] *= t[i];
    ifft4(s, k);
    s.resize(l);
    mint invm = mint(M).inverse();
    for (auto &x : s) x *= invm;
    return s;
  }
};


void solve() {
  using mint =ModInt<MOD>;
  int N, x, y;
  fastin.scan_serial(N);
  vector<mint> a(N), b(N);
  for (int i = 0; i < N; i++) {
    fastin.scan(x, y);
    a[i].x =x;
    b[i].x =y;
  } 
  NTT<mint> ntt;
  auto c = ntt.multiply(a, b);
  fastout.println(0);
  int l = c.size();
  for (int i = 0; i < l; ++i) {
    fastout.println(c[i].x);
  }
}

Submission Info

Submission Time
Task C - 高速フーリエ変換
User Nyaan
Language C++14 (GCC 5.4.1)
Score 100
Code Size 18039 Byte
Status AC
Exec Time 34 ms
Memory 5228 KB

Judge Result

Set Name Sample All
Score / Max Score 0 / 0 100 / 100
Status
AC × 1
AC × 33
Set Name Test Cases
Sample 00_sample_01
All 00_sample_01, 01_00_01, 01_01_19, 01_02_31, 01_03_22, 01_04_31, 01_05_40, 01_06_15, 01_07_39, 01_08_28, 01_09_30, 01_10_23, 01_11_33, 01_12_11, 01_13_28, 01_14_41, 01_15_26, 01_16_49, 01_17_34, 01_18_02, 01_19_33, 01_20_29, 02_00_51254, 02_01_82431, 02_02_17056, 02_03_34866, 02_04_6779, 02_05_65534, 02_06_65535, 02_07_65536, 02_08_65537, 02_09_65538, 02_10_100000
Case Name Status Exec Time Memory
00_sample_01 AC 1 ms 256 KB
01_00_01 AC 1 ms 256 KB
01_01_19 AC 1 ms 256 KB
01_02_31 AC 1 ms 256 KB
01_03_22 AC 1 ms 256 KB
01_04_31 AC 1 ms 256 KB
01_05_40 AC 1 ms 256 KB
01_06_15 AC 1 ms 256 KB
01_07_39 AC 1 ms 256 KB
01_08_28 AC 1 ms 256 KB
01_09_30 AC 1 ms 256 KB
01_10_23 AC 1 ms 256 KB
01_11_33 AC 1 ms 256 KB
01_12_11 AC 1 ms 256 KB
01_13_28 AC 1 ms 256 KB
01_14_41 AC 1 ms 256 KB
01_15_26 AC 1 ms 256 KB
01_16_49 AC 1 ms 256 KB
01_17_34 AC 1 ms 256 KB
01_18_02 AC 1 ms 256 KB
01_19_33 AC 1 ms 256 KB
01_20_29 AC 1 ms 256 KB
02_00_51254 AC 17 ms 2916 KB
02_01_82431 AC 33 ms 4596 KB
02_02_17056 AC 8 ms 1532 KB
02_03_34866 AC 16 ms 2420 KB
02_04_6779 AC 3 ms 768 KB
02_05_65534 AC 18 ms 3188 KB
02_06_65535 AC 18 ms 3188 KB
02_07_65536 AC 18 ms 3188 KB
02_08_65537 AC 32 ms 4220 KB
02_09_65538 AC 32 ms 4220 KB
02_10_100000 AC 34 ms 5228 KB