Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGNITE-23079 Make Raft storages destruction durable #4987

Merged
merged 27 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/raft/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dependencies {
implementation project(':ignite-rocksdb-common')
implementation project(':ignite-metrics')
implementation project(':ignite-failure-handler')
implementation project(':ignite-vault')
implementation libs.jetbrains.annotations
implementation libs.fastutil.core
implementation libs.disruptor
Expand Down Expand Up @@ -98,6 +99,7 @@ dependencies {
integrationTestImplementation testFixtures(project(':ignite-raft'))
integrationTestImplementation testFixtures(project(':ignite-runner'))
integrationTestImplementation testFixtures(project(':ignite-sql-engine'))
integrationTestImplementation project(':ignite-vault')
integrationTestImplementation project(':ignite-raft-api')
integrationTestImplementation project(':ignite-failure-handler')
integrationTestImplementation project(':ignite-runner')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,11 @@ private void doTestFollowerCatchUp(boolean snapshot, boolean cleanDir) throws Ex
assertThat(toStop.stopAsync(componentContext), willCompleteSuccessfully());
assertThat(serverServices.get(stopIdx).stopAsync(componentContext), willCompleteSuccessfully());
assertThat(logStorageFactories.get(stopIdx).stopAsync(componentContext), willCompleteSuccessfully());
assertThat(vaultManagers.get(stopIdx).stopAsync(componentContext), willCompleteSuccessfully());
servers.remove(stopIdx);
serverServices.remove(stopIdx);
logStorageFactories.remove(stopIdx);
vaultManagers.remove(stopIdx);
serverWorkingDirs.remove(stopIdx);

applyIncrements(client1, 11, 20);
Expand Down Expand Up @@ -891,9 +893,11 @@ private void doTestFollowerCatchUp(boolean snapshot, boolean cleanDir) throws Ex
assertThat(svc2.stopAsync(componentContext), willCompleteSuccessfully());
assertThat(serverServices.get(sv2Idx).stopAsync(componentContext), willCompleteSuccessfully());
assertThat(logStorageFactories.get(sv2Idx).stopAsync(componentContext), willCompleteSuccessfully());
assertThat(vaultManagers.get(sv2Idx).stopAsync(componentContext), willCompleteSuccessfully());
servers.remove(sv2Idx);
serverServices.remove(sv2Idx);
logStorageFactories.remove(sv2Idx);
vaultManagers.remove(sv2Idx);
serverWorkingDirs.remove(sv2Idx);

var svc3 = startServer(stopIdx, r -> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.ignite.raft.server;

import static org.apache.ignite.internal.testframework.IgniteTestUtils.assertThrows;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import org.apache.ignite.internal.lang.IgniteInternalException;
import org.apache.ignite.internal.raft.Peer;
import org.apache.ignite.internal.raft.RaftNodeId;
import org.apache.ignite.internal.raft.server.RaftGroupOptions;
import org.apache.ignite.internal.raft.server.impl.JraftServerImpl;
import org.apache.ignite.internal.raft.storage.LogStorageFactory;
import org.apache.ignite.internal.raft.storage.impl.LogStorageException;
import org.apache.ignite.internal.replicator.TestReplicationGroupId;
import org.junit.jupiter.api.Test;

/** Tests that check that failed storage destruction is finished after server restart. */
public class ItJraftDestructorTest extends JraftAbstractTest {
private static final int SERVER_INDEX = 0;

@Test
void testFinishStorageDestructionAfterRestart() throws Exception {
doTestFinishStorageDestructionAfterRestart(false);

// New log storage factory was created after restart.
verify(logStorageFactories.get(SERVER_INDEX), times(1)).destroyLogStorage(anyString());
}

@Test
void testVolatileLogStorageIsNotDestroyedOnRestart() throws Exception {
doTestFinishStorageDestructionAfterRestart(true);

// New log storage factory was created after restart.
verify(logStorageFactories.get(SERVER_INDEX), never()).destroyLogStorage(anyString());
}

private void doTestFinishStorageDestructionAfterRestart(boolean isVolatile) throws Exception {
JraftServerImpl server = startServer(SERVER_INDEX);
Path serverDataPath = serverWorkingDirs.get(SERVER_INDEX).basePath();

RaftNodeId nodeId = getRaftNodeId(server);

Path nodeDataPath = createServerDataPathForNode(serverDataPath, nodeId);

// Log storage destruction must fail, so raft server will save the intent to destroy the storage
// and will complete it successfully on restart.
LogStorageFactory logStorageFactory = logStorageFactories.get(SERVER_INDEX);
doThrow(LogStorageException.class).doCallRealMethod().when(logStorageFactory).destroyLogStorage(anyString());

RaftGroupOptions groupOptions = getRaftGroupOptions(isVolatile, logStorageFactory, serverDataPath);

assertThrows(
IgniteInternalException.class,
() -> server.destroyRaftNodeStorages(nodeId, groupOptions),
"Failed to delete storage for node: "
);

verify(logStorageFactory, times(1)).destroyLogStorage(anyString());

// Node data path deletion happens after log storage destruction, so it should be intact.
assertTrue(Files.exists(nodeDataPath));

shutdownCluster();

startServer(SERVER_INDEX);

assertFalse(Files.exists(nodeDataPath));
}

private RaftNodeId getRaftNodeId(JraftServerImpl server) {
String localNodeName = server.clusterService().topologyService().localMember().name();
Peer peer = Objects.requireNonNull(initialMembersConf.peer(localNodeName));

return new RaftNodeId(new TestReplicationGroupId("test"), peer);
}

private static Path createServerDataPathForNode(Path serverDataPath, RaftNodeId nodeId) throws IOException {
Path nodeDataPath = JraftServerImpl.getServerDataPath(serverDataPath, nodeId);

Files.createDirectories(nodeDataPath);

return nodeDataPath;
}

private static RaftGroupOptions getRaftGroupOptions(boolean isVolatile, LogStorageFactory logStorageFactory, Path serverDataPath) {
RaftGroupOptions groupOptions = isVolatile ? RaftGroupOptions.forVolatileStores() : RaftGroupOptions.forPersistentStores();
groupOptions.setLogStorageFactory(logStorageFactory).serverDataPath(serverDataPath);
return groupOptions;
}

private JraftServerImpl startServer(int index) {
return startServer(index, x -> {}, opts -> {});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,19 @@

import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.toSet;
import static org.apache.ignite.internal.configuration.IgnitePaths.vaultPath;
import static org.apache.ignite.internal.testframework.IgniteTestUtils.testNodeName;
import static org.apache.ignite.internal.testframework.matchers.CompletableFutureMatcher.willCompleteSuccessfully;
import static org.apache.ignite.raft.jraft.test.TestUtils.getLocalAddress;
import static org.apache.ignite.raft.jraft.test.TestUtils.waitForTopology;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.spy;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
Expand All @@ -45,16 +48,22 @@
import org.apache.ignite.internal.raft.RaftNodeId;
import org.apache.ignite.internal.raft.server.RaftServer;
import org.apache.ignite.internal.raft.server.TestJraftServerFactory;
import org.apache.ignite.internal.raft.server.impl.GroupStoragesContextResolver;
import org.apache.ignite.internal.raft.server.impl.JraftServerImpl;
import org.apache.ignite.internal.raft.service.RaftGroupService;
import org.apache.ignite.internal.raft.storage.GroupStoragesDestructionIntents;
import org.apache.ignite.internal.raft.storage.LogStorageFactory;
import org.apache.ignite.internal.raft.storage.impl.VaultGroupStoragesDestructionIntents;
import org.apache.ignite.internal.raft.util.SharedLogStorageFactoryUtils;
import org.apache.ignite.internal.raft.util.ThreadLocalOptimizedMarshaller;
import org.apache.ignite.internal.replicator.ReplicationGroupId;
import org.apache.ignite.internal.thread.NamedThreadFactory;
import org.apache.ignite.internal.util.IgniteUtils;
import org.apache.ignite.internal.vault.VaultManager;
import org.apache.ignite.internal.vault.persistence.PersistentVaultService;
import org.apache.ignite.network.NetworkAddress;
import org.apache.ignite.raft.jraft.option.NodeOptions;
import org.apache.ignite.raft.jraft.rpc.impl.RaftGroupEventsClientListener;
import org.apache.ignite.raft.jraft.test.TestUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -90,6 +99,8 @@ public abstract class JraftAbstractTest extends RaftServerAbstractTest {

protected final List<LogStorageFactory> logStorageFactories = new ArrayList<>();

protected final List<VaultManager> vaultManagers = new ArrayList<>();

protected final List<ComponentWorkingDir> serverWorkingDirs = new ArrayList<>();

protected final List<ClusterService> serverServices = new ArrayList<>();
Expand Down Expand Up @@ -169,6 +180,9 @@ protected void shutdownCluster() throws Exception {

assertThat(IgniteUtils.stopAsync(new ComponentContext(), logStorageFactories), willCompleteSuccessfully());
logStorageFactories.clear();

assertThat(IgniteUtils.stopAsync(new ComponentContext(), vaultManagers), willCompleteSuccessfully());
vaultManagers.clear();
}

/**
Expand All @@ -188,10 +202,10 @@ protected JraftServerImpl startServer(int idx, Consumer<RaftServer> clo, Consume

serverWorkingDirs.add(workingDir);

LogStorageFactory partitionsLogStorageFactory = SharedLogStorageFactoryUtils.create(
LogStorageFactory partitionsLogStorageFactory = spy(SharedLogStorageFactoryUtils.create(
service.nodeName(),
workingDir.raftLogPath()
);
));

assertThat(partitionsLogStorageFactory.startAsync(new ComponentContext()), willCompleteSuccessfully());

Expand All @@ -201,7 +215,29 @@ protected JraftServerImpl startServer(int idx, Consumer<RaftServer> clo, Consume

optionsUpdater.accept(opts);

JraftServerImpl server = TestJraftServerFactory.create(service, opts);
VaultManager vaultManager = new VaultManager(new PersistentVaultService(vaultPath(workingDir.basePath())));

vaultManagers.add(vaultManager);

assertThat(vaultManager.startAsync(new ComponentContext()), willCompleteSuccessfully());

GroupStoragesDestructionIntents groupStoragesDestructionIntents = new VaultGroupStoragesDestructionIntents(vaultManager);

String groupName = "testGroupName";

GroupStoragesContextResolver groupStoragesContextResolver = new GroupStoragesContextResolver(
replicationGroupId -> groupName,
Map.of(groupName, workingDir.basePath()),
Map.of(groupName, partitionsLogStorageFactory)
);

JraftServerImpl server = TestJraftServerFactory.create(
service,
opts,
new RaftGroupEventsClientListener(),
groupStoragesDestructionIntents,
groupStoragesContextResolver
);

assertThat(server.startAsync(new ComponentContext()), willCompleteSuccessfully());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import static java.util.Objects.requireNonNullElse;
import static org.apache.ignite.internal.util.CompletableFutures.nullCompletedFuture;

import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
Expand All @@ -45,9 +47,14 @@
import org.apache.ignite.internal.raft.configuration.VolatileRaftConfiguration;
import org.apache.ignite.internal.raft.server.RaftGroupOptions;
import org.apache.ignite.internal.raft.server.RaftServer;
import org.apache.ignite.internal.raft.server.impl.GroupStoragesContextResolver;
import org.apache.ignite.internal.raft.server.impl.JraftServerImpl;
import org.apache.ignite.internal.raft.service.RaftGroupListener;
import org.apache.ignite.internal.raft.service.RaftGroupService;
import org.apache.ignite.internal.raft.storage.GroupStoragesDestructionIntents;
import org.apache.ignite.internal.raft.storage.impl.NoopGroupStoragesDestructionIntents;
import org.apache.ignite.internal.raft.storage.impl.StorageDestructionIntent;
import org.apache.ignite.internal.raft.storage.impl.StoragesDestructionContext;
import org.apache.ignite.internal.raft.util.ThreadLocalOptimizedMarshaller;
import org.apache.ignite.internal.replicator.ReplicationGroupId;
import org.apache.ignite.internal.thread.NamedThreadFactory;
Expand Down Expand Up @@ -102,6 +109,28 @@ public class Loza implements RaftManager {

private final MetricManager metricManager;

/** Constructor using no-op group storages destruction intents. */
@TestOnly
public Loza(
ClusterService clusterService,
MetricManager metricManager,
RaftConfiguration raftConfiguration,
HybridClock hybridClock,
RaftGroupEventsClientListener raftGroupEventsClientListener,
FailureManager failureManager
) {
this(
clusterService,
metricManager,
raftConfiguration,
hybridClock,
raftGroupEventsClientListener,
failureManager,
new NoopGroupStoragesDestructionIntents(),
new GroupStoragesContextResolver(Objects::toString, Map.of(), Map.of())
);
}

/**
* The constructor.
*
Expand All @@ -110,14 +139,18 @@ public class Loza implements RaftManager {
* @param raftConfiguration Raft configuration.
* @param clock A hybrid logical clock.
* @param failureManager Failure processor that is used to handle critical errors.
* @param groupStoragesDestructionIntents Storage to persist {@link StorageDestructionIntent}s.
* @param groupStoragesContextResolver Resolver to get {@link StoragesDestructionContext}s for storage destruction.
*/
public Loza(
ClusterService clusterNetSvc,
MetricManager metricManager,
RaftConfiguration raftConfiguration,
HybridClock clock,
RaftGroupEventsClientListener raftGroupEventsClientListener,
FailureManager failureManager
FailureManager failureManager,
GroupStoragesDestructionIntents groupStoragesDestructionIntents,
GroupStoragesContextResolver groupStoragesContextResolver
) {
this.clusterNetSvc = clusterNetSvc;
this.raftConfiguration = raftConfiguration;
Expand All @@ -130,7 +163,14 @@ public Loza(

this.opts = options;

this.raftServer = new JraftServerImpl(clusterNetSvc, options, raftGroupEventsClientListener, failureManager);
this.raftServer = new JraftServerImpl(
clusterNetSvc,
options,
raftGroupEventsClientListener,
failureManager,
groupStoragesDestructionIntents,
groupStoragesContextResolver
);

this.executor = new ScheduledThreadPoolExecutor(
CLIENT_POOL_SIZE,
Expand Down
Loading