diff --git a/c#/crawler/src/Db/CrawlerDbContext.cs b/c#/crawler/src/Db/CrawlerDbContext.cs index 4942f0ce..41d2aac1 100644 --- a/c#/crawler/src/Db/CrawlerDbContext.cs +++ b/c#/crawler/src/Db/CrawlerDbContext.cs @@ -69,12 +69,10 @@ protected override void OnModelCreating(ModelBuilder b) b.Entity().ToTable("tbmc_user"); b.Entity().ToTable($"tbmc_f{Fid}_thread"); b.Entity().ToTable("tbmc_thread_missingFirstReply"); - b.Entity().ToTable($"tbmc_f{Fid}_reply") - .HasOne(e => e.Content).WithOne().HasForeignKey(e => e.Pid); + b.Entity().ToTable($"tbmc_f{Fid}_reply"); b.Entity().ToTable($"tbmc_f{Fid}_reply_content"); b.Entity().ToTable("tbmc_reply_signature").HasKey(e => new {e.SignatureId, e.XxHash3}); - b.Entity().ToTable($"tbmc_f{Fid}_subReply") - .HasOne(e => e.Content).WithOne().HasForeignKey(e => e.Spid); + b.Entity().ToTable($"tbmc_f{Fid}_subReply"); b.Entity().ToTable($"tbmc_f{Fid}_subReply_content"); _ = new RevisionWithSplitting diff --git a/c#/crawler/src/Db/Post/PostWithContentAndAuthorExpGrade.cs b/c#/crawler/src/Db/Post/PostWithContentAndAuthorExpGrade.cs index b3affdd5..769ff9fa 100644 --- a/c#/crawler/src/Db/Post/PostWithContentAndAuthorExpGrade.cs +++ b/c#/crawler/src/Db/Post/PostWithContentAndAuthorExpGrade.cs @@ -1,10 +1,9 @@ // ReSharper disable PropertyCanBeMadeInitOnly.Global namespace tbm.Crawler.Db.Post; -public abstract class PostWithContentAndAuthorExpGrade : PostWithAuthorExpGrade - where TPostContent : BasePostContent +public abstract class PostWithContentAndAuthorExpGrade : PostWithAuthorExpGrade { - public required TPostContent Content { get; set; } + [NotMapped] public byte[]? Content { get; set; } [JsonConverter(typeof(ProtoBufRepeatedFieldJsonConverter))] [NotMapped] diff --git a/c#/crawler/src/Db/Post/ReplyPost.cs b/c#/crawler/src/Db/Post/ReplyPost.cs index 0bd86c75..5f2fa4b4 100644 --- a/c#/crawler/src/Db/Post/ReplyPost.cs +++ b/c#/crawler/src/Db/Post/ReplyPost.cs @@ -1,7 +1,7 @@ // ReSharper disable PropertyCanBeMadeInitOnly.Global namespace tbm.Crawler.Db.Post; -public class ReplyPost : PostWithContentAndAuthorExpGrade +public class ReplyPost : PostWithContentAndAuthorExpGrade { [Key] [Column(TypeName = "bigint")] public ulong Pid { get; set; } diff --git a/c#/crawler/src/Db/Post/SubReplyPost.cs b/c#/crawler/src/Db/Post/SubReplyPost.cs index c7ff4abd..30ba0a8e 100644 --- a/c#/crawler/src/Db/Post/SubReplyPost.cs +++ b/c#/crawler/src/Db/Post/SubReplyPost.cs @@ -1,7 +1,7 @@ // ReSharper disable PropertyCanBeMadeInitOnly.Global namespace tbm.Crawler.Db.Post; -public class SubReplyPost : PostWithContentAndAuthorExpGrade +public class SubReplyPost : PostWithContentAndAuthorExpGrade { [Column(TypeName = "bigint")] public ulong Pid { get; set; } diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs index 10956655..2069b686 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs @@ -14,21 +14,13 @@ protected override IEnumerable ParseInternal protected override ReplyPost Convert(Reply inPost) { - var o = new ReplyPost - { - Content = null!, // will get mutated by SimplifyImagesInReplyContent() - ContentsProtoBuf = inPost.Content - }; + var o = new ReplyPost {ContentsProtoBuf = inPost.Content}; try { o.Pid = inPost.Pid; o.Floor = inPost.Floor; SimplifyImagesInReplyContent(logger, ref inPost); - o.Content = new() - { - Pid = inPost.Pid, - ProtoBufBytes = Helper.SerializedProtoBufWrapperOrNullIfEmpty(inPost.Content, Helper.WrapPostContent) - }; + o.Content = Helper.SerializedProtoBufWrapperOrNullIfEmpty(inPost.Content, Helper.WrapPostContent); // AuthorId rarely respond with 0, Author should always be null with no guarantee o.AuthorUid = inPost.AuthorId.NullIfZero() ?? inPost.Author?.Uid ?? 0; diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs index 19c065f8..a6771c05 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs @@ -13,19 +13,12 @@ protected override IEnumerable ParseInternal protected override SubReplyPost Convert(SubReply inPost) { - var o = new SubReplyPost - { - Content = new() - { - Spid = inPost.Spid, - ProtoBufBytes = Helper.SerializedProtoBufWrapperOrNullIfEmpty(inPost.Content, Helper.WrapPostContent) - }, - ContentsProtoBuf = inPost.Content - }; + var o = new SubReplyPost {ContentsProtoBuf = inPost.Content}; try { var author = inPost.Author; o.Spid = inPost.Spid; + o.Content = Helper.SerializedProtoBufWrapperOrNullIfEmpty(inPost.Content, Helper.WrapPostContent); o.AuthorUid = author.Uid; o.AuthorExpGrade = (byte)author.LevelId; o.PostedAt = inPost.Time; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index 0d53094e..1df7b72a 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -17,6 +17,8 @@ public override SaverChangeSet Save(CrawlerDbContext db) r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid}, LinqKit.PredicateBuilder.New(r => Posts.Keys.Contains(r.Pid))); + db.ReplyContents.AddRange(changeSet.NewlyAdded + .Select(r => new ReplyContent {Pid = r.Pid, ProtoBufBytes = r.Content})); PostSaveHandlers += replyContentImageSaver.Save(db, changeSet.NewlyAdded).Invoke; PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke; PostSaveHandlers += replySignatureSaver.Save(db, changeSet.AllAfter).Invoke; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index 7ed1fa80..339252c1 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -14,6 +14,9 @@ public override SaverChangeSet Save(CrawlerDbContext db) var changeSet = Save(db, sr => sr.Spid, sr => new SubReplyRevision {TakenAt = sr.UpdatedAt ?? sr.CreatedAt, Spid = sr.Spid}, LinqKit.PredicateBuilder.New(sr => Posts.Keys.Contains(sr.Spid))); + + db.SubReplyContents.AddRange(changeSet.NewlyAdded.Select(sr => + new SubReplyContent {Spid = sr.Spid, ProtoBufBytes = sr.Content})); PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke; return changeSet;