Skip to content

Commit efb62cb

Browse files
committed
* fix disposing the DbContext created by dbContextDefaultFactory instead of the Func<Owned<>> factory itself
@ `DoWork()` + primary ctor param `replyContentImageSaver` to re-insert `ImageInReply` entites with images url filename from newly re-extracted reply contents @ SimplifyImagesInAllReplyContentsWorker.cs @ crawler * using async overload for `DbContext.SaveChangesAsync()` * add params for captured variables `processEntityCount` & `process` to fix ReSharper inspection `AccessToModifiedClosure` * renamed from `SaveAndLog()` @ `SaveThenLog()` + param `writingEntitiesAction` for `SaveThenLog()` & move param `writingEntityEntryAction` before it @ `Transform()` @ TransformEntityWorker.cs @ shared @ c#
1 parent 0362053 commit efb62cb

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

c#/crawler/src/Worker/SimplifyImagesInAllReplyContentsWorker.cs

+18-8
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,39 @@ namespace tbm.Crawler.Worker;
33
public class SimplifyImagesInAllReplyContentsWorker(
44
ILogger<SimplifyImagesInAllReplyContentsWorker> logger,
55
Func<Owned<CrawlerDbContext.NewDefault>> dbContextDefaultFactory,
6-
Func<Owned<CrawlerDbContext.New>> dbContextFactory)
6+
Func<Owned<CrawlerDbContext.New>> dbContextFactory,
7+
ReplyContentImageSaver replyContentImageSaver)
78
: TransformEntityWorker<CrawlerDbContext, ReplyContent, ReplyContent, Pid>(logger)
89
{
910
protected override async Task DoWork(CancellationToken stoppingToken)
1011
{
11-
await using var db = dbContextDefaultFactory().Value();
12+
await using var dbDefaultFactory = dbContextDefaultFactory();
13+
var db = dbDefaultFactory.Value();
1214
foreach (var fid in from e in db.Forums select e.Fid)
1315
{
16+
await using var dbFactory = dbContextFactory();
1417
await Transform(
15-
() => dbContextFactory().Value(fid),
18+
() => dbFactory.Value(fid),
1619
saveByNthEntityCount: 10000,
17-
writingEntityEntry =>
18-
{
19-
var p = writingEntityEntry.Property(e => e.ProtoBufBytes);
20-
p.IsModified = !ByteArrayEqualityComparer.Instance.Equals(p.OriginalValue, p.CurrentValue);
21-
},
2220
readingEntity => readingEntity.Pid,
2321
readingEntity =>
2422
{
2523
var protoBuf = Reply.Parser.ParseFrom(readingEntity.ProtoBufBytes);
2624
ReplyParser.SimplifyImagesInReplyContent(logger, ref protoBuf);
2725
return new() {Pid = readingEntity.Pid, ProtoBufBytes = protoBuf.ToByteArray()};
2826
},
27+
writingEntityEntry =>
28+
{
29+
var p = writingEntityEntry.Property(e => e.ProtoBufBytes);
30+
p.IsModified = !ByteArrayEqualityComparer.Instance.Equals(p.OriginalValue, p.CurrentValue);
31+
},
32+
(writingDb, writingEntities) => replyContentImageSaver
33+
.Save(writingDb, writingEntities.Select(e => new ReplyPost
34+
{
35+
Pid = e.Pid,
36+
Content = null!,
37+
ContentsProtoBuf = Reply.Parser.ParseFrom(e.ProtoBufBytes).Content
38+
})),
2939
stoppingToken);
3040
}
3141
}

c#/shared/src/TransformEntityWorker.cs

+10-8
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ public abstract class TransformEntityWorker<TDbContext, TReadingEntity, TWriting
2828
protected async Task Transform(
2929
Func<TDbContext> dbContextFactory,
3030
int saveByNthEntityCount,
31-
Action<EntityEntry<TWritingEntity>> writingEntityEntryAction,
3231
Func<TReadingEntity, TExceptionId> readingEntityExceptionIdSelector,
3332
Func<TReadingEntity, TWritingEntity> entityTransformer,
33+
Action<EntityEntry<TWritingEntity>> writingEntityEntryAction,
34+
Action<TDbContext, IEnumerable<TWritingEntity>> writingEntitiesAction,
3435
CancellationToken stoppingToken = default)
3536
{
3637
var processedEntityCount = 0;
@@ -45,26 +46,27 @@ protected async Task Transform(
4546
from e in readingDb.Set<TReadingEntity>().AsNoTracking() select e;
4647
var writingEntities = new List<TWritingEntity>();
4748

48-
void SaveAndLog()
49+
async Task SaveThenLog(int processedCount, Process currentProcess)
4950
{
50-
writingDb.Set<TWritingEntity>().UpdateRange(writingEntities);
51+
writingDb.Set<TWritingEntity>().AttachRange(writingEntities);
5152
writingDb.ChangeTracker.Entries<TWritingEntity>().ForEach(writingEntityEntryAction);
52-
var updatedEntityCount = writingDb.SaveChanges();
53+
writingEntitiesAction(writingDb, writingEntities);
54+
var updatedEntityCount = await writingDb.SaveChangesAsync(stoppingToken);
5355
writingEntities.Clear();
5456
writingDb.ChangeTracker.Clear();
5557

5658
logger.LogTrace("processedEntityCount:{} updatedEntityCount:{} elapsed:{}ms processMemory:{}MiB exceptions:{}",
57-
processedEntityCount, updatedEntityCount,
59+
processedCount, updatedEntityCount,
5860
stopwatch.ElapsedMilliseconds,
59-
process.PrivateMemorySize64 / 1024 / 1024,
61+
currentProcess.PrivateMemorySize64 / 1024 / 1024,
6062
JsonSerializer.Serialize(exceptions, JsonSerializerOptions));
6163
stopwatch.Restart();
6264
}
6365

6466
foreach (var readingEntity in readingEntities)
6567
{
6668
processedEntityCount++;
67-
if (processedEntityCount % saveByNthEntityCount == 0) SaveAndLog();
69+
if (processedEntityCount % saveByNthEntityCount == 0) await SaveThenLog(processedEntityCount, process);
6870
if (stoppingToken.IsCancellationRequested) break;
6971
try
7072
{
@@ -85,6 +87,6 @@ void SaveAndLog()
8587
}
8688
}
8789

88-
SaveAndLog();
90+
await SaveThenLog(processedEntityCount, process);
8991
}
9092
}

0 commit comments

Comments
 (0)