Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CodeForces - 825F String Compression #42

Open
Wizmann opened this issue Jul 20, 2020 · 0 comments
Open

CodeForces - 825F String Compression #42

Wizmann opened this issue Jul 20, 2020 · 0 comments

Comments

@Wizmann
Copy link
Owner

Wizmann commented Jul 20, 2020

Problem

Description

我们可以把一个字符串S表示为:s_1 a_1 s_2 a_2 ... s_k a_ks_i代表一个子串,a_i是一个整数,代表这个子串重复的次数。

例如"aa"可以表示为"a2",也可以表示为"a1a1"。"bb...b"(10个b)可以表示为"b10"。

S的长度最大为8000。问使用这种方法,能表示字符串S的最小长度。

Solution

现在O(n^2)的算法,n=8000都能过了?真是活久见啊。

这题的思路非常简单。dp[i]代表前i个字符的最小表示长度。compress(S[i...j])代表子串的最小表示长度,这个可以使用KMP算法很容易的实现。

所以DP公式可以写做: dp[j] = min(dp[j], dp[i] + compress(S[i + 1 ... j])

BUT,有了DP公式并不是本题的全部,我们要看到除了DP公式之外的部分才能提高解题的正确率。

对于compress(S[i...j])函数,我们既要考虑到子串有循环节的情况(KMP经典应用之一),又不能忽视子串没有循环节的情况(如"abc")。

本题的一个特殊之处是,构造数据比较困难。所以我们要从理论上多进行打磨,避免卡题。对于构造数据比较容易的题目,可以双管齐下。不过理论上的证明总比写对拍程序要省时间。。。

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <algorithm>
#include <string>
#include <vector>
#include <cassert>

using namespace std;

#define print(x) cout << x << endl
#define input(x) cin >> x

const int INF = 0x3f3f3f3f;

char buffer[8888];

vector<int> kmp_next(const char* s, int n) {
    vector<int> res(n + 1, -1);

    for (int pre = -1, suf = 0; pre < n && suf < n; /* pass */) {
        if (pre == -1 || s[pre] == s[suf]) {
            suf++;
            pre++;
            res[suf] = pre;
        } else {
            pre = res[pre];
        }
    }
    return res;
}

int intlen(int x) {
    if (x == 0) {
        assert(false);
    }
    int len = 0;
    while (x) {
        len++;
        x /= 10;
    }
    return len;
}

int compress(const vector<int>& kmpnext, int l, int r) {
    int m = r - l + 1;
    int res = m + intlen(1);
    
    if (m % (m - kmpnext[r]) == 0) {
        int loop = m - kmpnext[r];
        res = min(res, intlen(m / loop) + loop);
    }
    return res;
}

int solve(const string& s) {
    int n = s.size();
    vector<int> dp(n + 1, INF);

    dp[0] = 0;
    dp[n] = n + 1;

    for (int i = 0; i < n; i++) {
        int m = n - i;
        vector<int> next = kmp_next(s.data() + i, m);
        dp[i] = min(dp[i], i + 1);
        for (int j = 1; j <= m; j++) {
            assert(next[j] >= 0);
            dp[i + j] = min(dp[i + j], dp[i] + compress(next, 1, j));
        }
    }

    /*
    for (int i = 0; i <= n; i++) {
        printf("%d ", dp[i]);
    }
    puts("");
    */

    return dp[n];
}

void test() {
    assert(solve("xxxcac") == 6);
    assert(solve("ababcaababcbac") == 14);
    assert(solve("aaaabbccdabababa") == 12);
    assert(solve("bbbacacb") == 7);
    assert(solve("abaabacccccccc") == 6);
    assert(solve("abaaba") == 4);
    assert(solve("a") == 2);
    assert(solve("abccd") == 6);
    assert(solve("abcc") == 5);
    assert(solve("abbcc") == 6);
    assert(solve("abcab") == 6);
    assert(solve("aaaaaaaaaa") == 3);
    assert(solve("cczabababab") == 7);
}

int main() {
    test();

    scanf("%s", buffer);
    print(solve(string(buffer)));
    return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant