Skip to content

Commit

Permalink
[chore/performance] Update media prune logic, add extra CLI command (#…
Browse files Browse the repository at this point in the history
…1474)

* start updating media prune stuff a wee bit

* continue prune / uncache work

* more tidying + consistency stuff

* add prune CLI command

* docs

* arg
  • Loading branch information
tsmethurst authored Feb 11, 2023
1 parent 70739d3 commit 40bc03e
Show file tree
Hide file tree
Showing 31 changed files with 1,112 additions and 1,089 deletions.
78 changes: 78 additions & 0 deletions cmd/gotosocial/action/admin/media/prune/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors [email protected]
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package prune

import (
"context"
"fmt"

"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
)

type prune struct {
dbService db.DB
storage *gtsstorage.Driver
manager media.Manager
}

func setupPrune(ctx context.Context) (*prune, error) {
var state state.State
state.Caches.Init()

dbService, err := bundb.NewBunDBService(ctx, &state)
if err != nil {
return nil, fmt.Errorf("error creating dbservice: %w", err)
}

storage, err := gtsstorage.AutoConfig() //nolint:contextcheck
if err != nil {
return nil, fmt.Errorf("error creating storage backend: %w", err)
}

manager, err := media.NewManager(dbService, storage) //nolint:contextcheck
if err != nil {
return nil, fmt.Errorf("error instantiating mediamanager: %w", err)
}

return &prune{
dbService: dbService,
storage: storage,
manager: manager,
}, nil
}

func (p *prune) shutdown(ctx context.Context) error {
if err := p.storage.Close(); err != nil {
return fmt.Errorf("error closing storage backend: %w", err)
}

if err := p.dbService.Stop(ctx); err != nil {
return fmt.Errorf("error closing dbservice: %w", err)
}

if err := p.manager.Stop(); err != nil {
return fmt.Errorf("error closing media manager: %w", err)
}

return nil
}
37 changes: 6 additions & 31 deletions cmd/gotosocial/action/admin/media/prune/orphaned.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,28 @@ import (

"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/db/bundb"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/state"
gtsstorage "github.com/superseriousbusiness/gotosocial/internal/storage"
)

// Orphaned prunes orphaned media from storage.
var Orphaned action.GTSAction = func(ctx context.Context) error {
var state state.State
state.Caches.Init()

dbService, err := bundb.NewBunDBService(ctx, &state)
if err != nil {
return fmt.Errorf("error creating dbservice: %s", err)
}

storage, err := gtsstorage.AutoConfig()
prune, err := setupPrune(ctx)
if err != nil {
return fmt.Errorf("error creating storage backend: %w", err)
}

manager, err := media.NewManager(dbService, storage)
if err != nil {
return fmt.Errorf("error instantiating mediamanager: %s", err)
return err
}

dry := config.GetAdminMediaPruneDryRun()

pruned, err := manager.PruneOrphaned(ctx, dry)
pruned, err := prune.manager.PruneOrphaned(ctx, dry)
if err != nil {
return fmt.Errorf("error pruning: %s", err)
}

if dry /* dick heyyoooooo */ {
log.Infof("DRY RUN: %d stored items are orphaned and eligible to be pruned", pruned)
log.Infof("DRY RUN: %d items are orphaned and eligible to be pruned", pruned)
} else {
log.Infof("%d stored items were orphaned and pruned", pruned)
}

if err := storage.Close(); err != nil {
return fmt.Errorf("error closing storage backend: %w", err)
}

if err := dbService.Stop(ctx); err != nil {
return fmt.Errorf("error closing dbservice: %s", err)
log.Infof("%d orphaned items were pruned", pruned)
}

return nil
return prune.shutdown(ctx)
}
58 changes: 58 additions & 0 deletions cmd/gotosocial/action/admin/media/prune/remote.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
GoToSocial
Copyright (C) 2021-2023 GoToSocial Authors [email protected]
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package prune

import (
"context"
"fmt"

"github.com/superseriousbusiness/gotosocial/cmd/gotosocial/action"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/log"
)

// Remote prunes old and/or unused remote media.
var Remote action.GTSAction = func(ctx context.Context) error {
prune, err := setupPrune(ctx)
if err != nil {
return err
}

dry := config.GetAdminMediaPruneDryRun()

pruned, err := prune.manager.PruneUnusedRemote(ctx, dry)
if err != nil {
return fmt.Errorf("error pruning: %w", err)
}

uncached, err := prune.manager.UncacheRemote(ctx, config.GetMediaRemoteCacheDays(), dry)
if err != nil {
return fmt.Errorf("error pruning: %w", err)
}

total := pruned + uncached

if dry /* dick heyyoooooo */ {
log.Infof("DRY RUN: %d remote items are unused/stale and eligible to be pruned", total)
} else {
log.Infof("%d unused/stale remote items were pruned", pruned)
}

return prune.shutdown(ctx)
}
5 changes: 0 additions & 5 deletions cmd/gotosocial/action/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,6 @@ var Start action.GTSAction = func(ctx context.Context) error {
return fmt.Errorf("error starting gotosocial service: %s", err)
}

// perform initial media prune in case value of MediaRemoteCacheDays changed
if err := processor.AdminMediaPrune(ctx, config.GetMediaRemoteCacheDays()); err != nil {
return fmt.Errorf("error during initial media prune: %s", err)
}

// catch shutdown signals from the operating system
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
Expand Down
14 changes: 13 additions & 1 deletion cmd/gotosocial/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ func adminCommands() *cobra.Command {
Use: "prune",
Short: "admin commands for pruning unused/orphaned media from storage",
}
config.AddAdminMediaPrune(adminMediaPruneCmd)

adminMediaPruneOrphanedCmd := &cobra.Command{
Use: "orphaned",
Expand All @@ -184,6 +183,19 @@ func adminCommands() *cobra.Command {
config.AddAdminMediaPrune(adminMediaPruneOrphanedCmd)
adminMediaPruneCmd.AddCommand(adminMediaPruneOrphanedCmd)

adminMediaPruneRemoteCmd := &cobra.Command{
Use: "remote",
Short: "prune unused/stale remote media from storage, older than given number of days",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), prune.Remote)
},
}
config.AddAdminMediaPrune(adminMediaPruneRemoteCmd)
adminMediaPruneCmd.AddCommand(adminMediaPruneRemoteCmd)

adminMediaCmd.AddCommand(adminMediaPruneCmd)

adminCmd.AddCommand(adminMediaCmd)
Expand Down
37 changes: 37 additions & 0 deletions docs/admin/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,40 @@ Example (for real):
```bash
gotosocial admin media prune orphaned --dry-run=false
```

### gotosocial admin media prune remote

This command can be used to prune unused/stale remote media from your GoToSocial.

Stale media means avatars/headers/status attachments from remote instances that are older than `media-remote-cache-days`.

These items will be refetched later on demand, if necessary.

Unused media means avatars/headers/status attachments which are not currently in use by an account or status.

**This command only works when GoToSocial is not running, since it acquires an exclusive lock on storage. Stop GoToSocial first before running this command!**

```text
prune unused/stale remote media from storage, older than given number of days
Usage:
gotosocial admin media prune remote [flags]
Flags:
--dry-run perform a dry run and only log number of items eligible for pruning (default true)
-h, --help help for remote
```

By default, this command performs a dry run, which will log how many items can be pruned. To do it for real, add `--dry-run=false` to the command.

Example (dry run):

```bash
gotosocial admin media prune remote
```

Example (for real):

```bash
gotosocial admin media prune remote --dry-run=false
```
2 changes: 1 addition & 1 deletion docs/api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3608,7 +3608,7 @@ paths:
- application/json
- application/xml
- application/x-www-form-urlencoded
description: Also cleans up unused headers + avatars from the media cache.
description: Also cleans up unused headers + avatars from the media cache and prunes orphaned items from storage.
operationId: mediaCleanup
parameters:
- description: |-
Expand Down
3 changes: 2 additions & 1 deletion internal/api/client/admin/mediacleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ import (
// MediaCleanupPOSTHandler swagger:operation POST /api/v1/admin/media_cleanup mediaCleanup
//
// Clean up remote media older than the specified number of days.
// Also cleans up unused headers + avatars from the media cache.
//
// Also cleans up unused headers + avatars from the media cache and prunes orphaned items from storage.
//
// ---
// tags:
Expand Down
2 changes: 2 additions & 0 deletions internal/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,6 @@ var Defaults = Configuration{
UserSweepFreq: time.Second * 10,
},
},

AdminMediaPruneDryRun: true,
}
46 changes: 40 additions & 6 deletions internal/db/bundb/media.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,23 @@ func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, l
return attachments, nil
}

func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {
q := m.conn.
NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
Column("media_attachment.id").
Where("? = ?", bun.Ident("media_attachment.cached"), true).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
WhereGroup(" AND ", whereNotEmptyAndNotNull("media_attachment.remote_url"))

count, err := q.Count(ctx)
if err != nil {
return 0, m.conn.ProcessError(err)
}

return count, nil
}

func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}

Expand All @@ -98,7 +115,7 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit
return attachments, nil
}

func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
attachments := []*gtsmodel.MediaAttachment{}

q := m.newMediaQ(&attachments).
Expand All @@ -107,11 +124,8 @@ func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan tim
Where("? = ?", bun.Ident("media_attachment.header"), false).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
Where("? IS NULL", bun.Ident("media_attachment.remote_url")).
Where("? IS NULL", bun.Ident("media_attachment.status_id"))

if maxID != "" {
q = q.Where("? < ?", bun.Ident("media_attachment.id"), maxID)
}
Where("? IS NULL", bun.Ident("media_attachment.status_id")).
Order("media_attachment.created_at DESC")

if limit != 0 {
q = q.Limit(limit)
Expand All @@ -123,3 +137,23 @@ func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan tim

return attachments, nil
}

func (m *mediaDB) CountLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) {
q := m.conn.
NewSelect().
TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")).
Column("media_attachment.id").
Where("? = ?", bun.Ident("media_attachment.cached"), true).
Where("? = ?", bun.Ident("media_attachment.avatar"), false).
Where("? = ?", bun.Ident("media_attachment.header"), false).
Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan).
Where("? IS NULL", bun.Ident("media_attachment.remote_url")).
Where("? IS NULL", bun.Ident("media_attachment.status_id"))

count, err := q.Count(ctx)
if err != nil {
return 0, m.conn.ProcessError(err)
}

return count, nil
}
2 changes: 1 addition & 1 deletion internal/db/bundb/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
ctx := context.Background()

attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), 10)
suite.NoError(err)
suite.Len(attachments, 1)
}
Expand Down
Loading

0 comments on commit 40bc03e

Please sign in to comment.