diff 周りの処理

きつねのしっぽで使っている diff まわりを公開してみます。元コードを作成した id:siokoshou さんへのフィードバックと感謝を込めて。どうもありがとうございました!

using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Diagnostics;

// http://d.hatena.ne.jp/siokoshou/20070315#p1
namespace FoxTails.Text {
    public enum EditType: byte {
        Nothing, Add, Delete
    }

    [Serializable]
    public class DiffResult {
        public readonly EditType Type;
        public readonly string Text;
        public readonly int Line;

        public DiffResult(EditType type, int line, string text) {
            this.Type = type;
            this.Line = line;
            this.Text = text;
        }

        public override string ToString() {
            return string.Format("{0}: {1}, {2}", Line, Type, Text);
        }
    }

    public class FastDiff {
        private readonly int[] dataA, dataB;
        private readonly string[] textA, textB;
        private readonly bool isSwap;
        private Snake[] fp;

        private static readonly Regex crlf = new Regex("\r\n|\r|\n", RegexOptions.Compiled);

        private FastDiff(string[] textA, string[] textB) {
            if (textA.Length <= textB.Length) {
                this.isSwap = false;
                this.textA = textA;
                this.textB = textB;
            }
            else {
                this.isSwap = true;
                this.textA = textB;
                this.textB = textA;
            }
            this.dataA = MakeHash(this.textA);
            this.dataB = MakeHash(this.textB);
        }

        /// <summary>複数行の文字列を行単位で比較します</summary>
        /// <param name="textA">元テキスト</param>
        /// <param name="textB">変更テキスト</param>
        /// <returns>比較結果</returns>
        public static DiffResult[] Diff(string textA, string textB) {
            if (textA == null || textB == null) {
                throw new ArgumentNullException();
            }
            if (textA == textB) {
                return new DiffResult[0];
            }
            if (textA == string.Empty) {
                return CreateSimpleResult(EditType.Add, textB);
            }
            if (textB == string.Empty) {
                return CreateSimpleResult(EditType.Delete, textA);
            }
            return new FastDiff(SplitLine(textA), SplitLine(textB)).DetectDiff();
        }

        private static DiffResult[] CreateSimpleResult(EditType type, string text) {
            string[] line = SplitLine(text);
            DiffResult[] result = new DiffResult[line.Length];
            for (int i = 0; i < line.Length; i++) {
                result[i] = new DiffResult(type, i, line[i]);
            }
            return result;
        }

        private static string[] SplitLine(string text) {
            return crlf.Split(text);
        }

        private static int[] MakeHash(string[] text) {
            List<int> list = new List<int>();
            foreach (string line in text) {
                list.Add(MakeHash(line));
            }
            return list.ToArray();
        }

        private static int MakeHash(string line) {
            const int seed = 5381;
            int hash = seed;
            foreach (char c in line) {
                hash = ((hash << 5) ^ (hash >> 27)) ^ c;
            }
            return hash;
        }

        private DiffResult[] DetectDiff() {
            Debug.Assert(this.dataA.Length <= this.dataB.Length);

            int DELTA = dataB.Length - dataA.Length;
            this.fp = new Snake[dataA.Length + dataB.Length + 3];

            int p = 0;
            do {
                for (int k = -p; k < DELTA; k++) {
                    SearchSnake(k);
                }
                for (int k = DELTA + p; k >= DELTA; k--) {
                    SearchSnake(k);
                }
                p++;
            }
            while (this.fp[dataB.Length + 1].posB != (dataB.Length + 1));

            // 末尾検出用のCS
            CommonSubsequence endCS = new CommonSubsequence(this.dataA.Length, this.dataB.Length, 0, this.fp[this.dataB.Length + 1].CS);
            CommonSubsequence result = CommonSubsequence.Reverse(endCS);

            if (this.isSwap)
                return PresentDiffSwap(result);
            else
                return PresentDiff(result);
        }

        private void SearchSnake(int k) {
            int kk = this.dataA.Length + 1 + k;
            CommonSubsequence previousCS = null;
            int posA = 0, posB = 0;

            int lk = kk - 1;
            int rk = kk + 1;

            // 論文のfp[n]は-1始まりだが、0始まりのほうが初期化の都合がよいため、
            // +1のゲタを履かせる。fpから読む際は-1し、書く際は+1する。
            int lb = this.fp[lk].posB;
            int rb = this.fp[rk].posB - 1;

            if (lb > rb) {
                posB = lb;
                previousCS = this.fp[lk].CS;
            }
            else {
                posB = rb;
                previousCS = this.fp[rk].CS;
            }
            posA = posB - k;

            int startA = posA;
            int startB = posB;

            while ((posA < dataA.Length) && (posB < dataB.Length)
                && (dataA[posA] == dataB[posB]) && textA[posA] == textB[posB]) {
                posA++;
                posB++;
            }

            if (startA != posA) {
                this.fp[kk].CS = new CommonSubsequence(startA, startB, posA - startA, previousCS);
            }
            else {
                this.fp[kk].CS = previousCS;
            }
            this.fp[kk].posB = posB + 1; // fpへ+1して書く。論文のfpに+1のゲタを履かせる。
        }

        private DiffResult[] PresentDiff(CommonSubsequence cs) {
            List<DiffResult> list = new List<DiffResult>();
            int originalStart = 0, modifiedStart = 0;

            while (true) {
                if (originalStart < cs.StartA || modifiedStart < cs.StartB) {
                    for (int line = originalStart; line < cs.StartA; line++) {
                        list.Add(new DiffResult(EditType.Delete, line, this.textA[line]));
                    }
                    for (int line = modifiedStart; line < cs.StartB; line++) {
                        list.Add(new DiffResult(EditType.Add, line, this.textB[line]));
                    }
                }

                // 末尾検出
                if (cs.Length == 0) break;

                originalStart = cs.StartA + cs.Length;
                modifiedStart = cs.StartB + cs.Length;

                cs = cs.Next;
            }
            return list.ToArray();
        }

        private DiffResult[] PresentDiffSwap(CommonSubsequence cs) {
            List<DiffResult> list = new List<DiffResult>();
            int originalStart = 0, modifiedStart = 0;

            while (true) {
                if (originalStart < cs.StartB || modifiedStart < cs.StartA) {
                    for (int line = originalStart; line < cs.StartB; line++) {
                        list.Add(new DiffResult(EditType.Delete, line, this.textB[line]));
                    }
                    for (int line = modifiedStart; line < cs.StartA; line++) {
                        list.Add(new DiffResult(EditType.Add, line, this.textA[line]));
                    }
                }

                // 末尾検出
                if (cs.Length == 0) break;

                originalStart = cs.StartB + cs.Length;
                modifiedStart = cs.StartA + cs.Length;
                cs = cs.Next;
            }
            return list.ToArray();
        }

        private struct Snake {
            public int posB;
            public CommonSubsequence CS;

            public override string ToString() {
                return "posB:" + this.posB + ", CS:" + ((this.CS == null) ? "null" : "exist");
            }
        }

        private class CommonSubsequence {
            private int startA_, startB_;
            private int length_;
            public CommonSubsequence Next;

            public int StartA { get { return this.startA_; } }
            public int StartB { get { return this.startB_; } }
            public int Length { get { return this.length_; } }

            public CommonSubsequence() { }

            public CommonSubsequence(int startA, int startB, int length, CommonSubsequence next) {
                this.startA_ = startA;
                this.startB_ = startB;
                this.length_ = length;
                this.Next = next;
            }

            public static CommonSubsequence Reverse(CommonSubsequence old) {
                CommonSubsequence newTop = null;
                while (old != null) {
                    CommonSubsequence next = old.Next;
                    old.Next = newTop;
                    newTop = old;
                    old = next;
                }
                return newTop;
            }
        }
    }
}

オリジナルには幾つか diff オプション付けられる様になっていましたが、きつねのしっぽでは不要だったのでぜんぶ外してしまいました。その他、オリジナルでは「変更箇所」を出力する形式だったのを、こちらでは「削除行」「追加行」に分けて出力するようになっています。
ロックマン3のスネークマンが使ってくる技名っぽい関数の中は、ほぼブラックボックス状態でした。ここには手を付けられませぬ(´・ω・`)