Skip to content

Commit

Permalink
feat(metrics): add metrics (#79)
Browse files Browse the repository at this point in the history
* include and expose default metrics

* update dependencies

* add metrics for scheduled lifetime tasks and for the docker registry client

* add more metrics

* add metrics to deploymentmanager

* add metrics to deploymentmanager

* add more metrics to the k8s parts

* add websocket metrics

* fix metric names

* release name is human-readable

Co-authored-by: Tom Schoener <[email protected]>
  • Loading branch information
philmtd and tom-schoener authored Jul 11, 2022
1 parent bfd295a commit ce8a360
Show file tree
Hide file tree
Showing 26 changed files with 805 additions and 362 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ development versions of your software into Kubernetes via Helm to allow everybod
* You need Helm charts for each project you want to deploy. The charts need to be hosted in a chart registry.
* Currently we support standard Helm chart registries and Helm GCS
* The Docker image tag and the image pull policy need to be configurable
* O-Neko works with kubernetes versions 1.10.0 - 1.22.1 (these versions are officially supported by the Kubernetes client library we use)
* O-Neko works with kubernetes versions 1.10.0 - 1.23.3 (these versions are *officially* supported by the Kubernetes client library we use)

## How does it work?

Expand Down
35 changes: 20 additions & 15 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<spring.boot.version>2.4.1</spring.boot.version>
<feign.version>10.11</feign.version>
<org.mapstruct.version>1.4.1.Final</org.mapstruct.version>
<org.projectlombok.lombok.version>1.18.16</org.projectlombok.lombok.version>
<springdoc.version>1.5.10</springdoc.version>
<spring.boot.version>2.7.0</spring.boot.version>
<feign.version>11.8</feign.version>
<org.mapstruct.version>1.5.2.Final</org.mapstruct.version>
<org.projectlombok.lombok.version>1.18.24</org.projectlombok.lombok.version>
<springdoc.version>1.6.9</springdoc.version>
<branchName></branchName>
<buildNumber></buildNumber>
</properties>
Expand Down Expand Up @@ -69,6 +69,11 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-security</artifactId>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>io.github.openfeign</groupId>
<artifactId>feign-core</artifactId>
Expand Down Expand Up @@ -115,7 +120,7 @@
<dependency>
<groupId>com.github.ben-manes.caffeine</groupId>
<artifactId>caffeine</artifactId>
<version>2.8.8</version>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
Expand All @@ -127,7 +132,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
<version>3.12.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
Expand All @@ -142,34 +147,34 @@
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.8.0</version>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.9</version>
<version>2.9.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.yaml</groupId>
<artifactId>snakeyaml</artifactId>
<version>1.27</version>
<version>1.30</version>
</dependency>
<dependency>
<groupId>de.flapdoodle.embed</groupId>
<artifactId>de.flapdoodle.embed.mongo</artifactId>
<version>3.0.0</version>
<version>3.4.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>30.1.1-jre</version>
<version>31.1-jre</version>
</dependency>
<dependency>
<groupId>io.fabric8</groupId>
<artifactId>kubernetes-client</artifactId>
<version>5.8.0</version>
<version>5.12.2</version>
</dependency>
<dependency>
<groupId>org.mapstruct</groupId>
Expand All @@ -179,7 +184,7 @@
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.18.1</version>
<version>3.23.1</version>
<scope>test</scope>
</dependency>
<dependency>
Expand All @@ -190,7 +195,7 @@
<dependency>
<groupId>net.logstash.logback</groupId>
<artifactId>logstash-logback-encoder</artifactId>
<version>6.6</version>
<version>7.2</version>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
Expand Down
75 changes: 54 additions & 21 deletions src/main/java/io/oneko/automations/ScheduledLifetimeController.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
package io.oneko.automations;

import static io.oneko.util.MoreStructuredArguments.projectKv;
import static io.oneko.util.MoreStructuredArguments.versionKv;
import static net.logstash.logback.argument.StructuredArguments.kv;
import static io.oneko.util.MoreStructuredArguments.*;
import static net.logstash.logback.argument.StructuredArguments.*;

import java.util.List;
import java.util.Optional;
Expand All @@ -15,52 +14,86 @@
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import io.oneko.kubernetes.DeploymentManager;
import io.oneko.kubernetes.deployments.DeployableStatus;
import io.oneko.kubernetes.deployments.Deployment;
import io.oneko.kubernetes.deployments.DeploymentRepository;
import io.oneko.metrics.MetricNameBuilder;
import io.oneko.project.ProjectRepository;
import io.oneko.project.ProjectVersion;
import io.oneko.project.ReadableProject;
import io.oneko.project.WritableProjectVersion;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;

@Component
@Slf4j
@AllArgsConstructor
public class ScheduledLifetimeController {

private final LifetimeBehaviourService lifetimeBehaviourService;
private final ProjectRepository projectRepository;
private final DeploymentRepository deploymentRepository;
private final DeploymentManager deploymentManager;

private final Timer scheduledProjectCheckTimer;
private final Timer expiredDeploymentStopTimer;
private final Timer retrieveExpiredDeploymentsTimer;

public ScheduledLifetimeController(LifetimeBehaviourService lifetimeBehaviourService,
ProjectRepository projectRepository,
DeploymentRepository deploymentRepository,
DeploymentManager deploymentManager,
MeterRegistry meterRegistry) {
this.lifetimeBehaviourService = lifetimeBehaviourService;
this.projectRepository = projectRepository;
this.deploymentRepository = deploymentRepository;
this.deploymentManager = deploymentManager;

this.scheduledProjectCheckTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.checkProjects").build())
.description("the time it takes O-Neko to check all projects for versions which have a lifetime configuration which needs to be checked")
.publishPercentileHistogram()
.register(meterRegistry);
this.retrieveExpiredDeploymentsTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.deployments.retrieveExpired").build())
.description("the time it takes O-Neko to filter and retrieve expired deployments")
.publishPercentileHistogram()
.register(meterRegistry);
this.expiredDeploymentStopTimer = Timer.builder(new MetricNameBuilder().durationOf("lifetime.scheduled.deployments.stopExpired").build())
.description("the time it takes O-Neko to stop an individual expired deployment")
.publishPercentileHistogram()
.register(meterRegistry);
}

@Scheduled(fixedRate = 5 * 60000)
public void checkProjects() {
final List<ProjectVersion<?,?>> versions = projectRepository.getAll().stream()
final var sample = Timer.start();
final List<ProjectVersion<?, ?>> versions = projectRepository.getAll().stream()
.map(ReadableProject::writable)
.flatMap(project -> project.getVersions().stream())
.filter(this::shouldConsiderVersion)
.collect(Collectors.toList());

sample.stop(scheduledProjectCheckTimer);
stopExpiredDeployments(versions,
projectVersion -> log.info("deployment expired ({}, {})", versionKv(projectVersion), projectKv(projectVersion.getProject())));
}

private void stopExpiredDeployments(List<ProjectVersion<?,?>> deployables, Consumer<ProjectVersion<?,?>> beforeStopDeployment) {
private void stopExpiredDeployments(List<ProjectVersion<?, ?>> deployables, Consumer<ProjectVersion<?, ?>> beforeStopDeployment) {
final Timer.Sample retrieveDeploymentsStart = Timer.start();
final var deployments = getRelevantDeploymentsFor(deployables);
final var expiredPairsOfDeployableAndDeployment = getExpiredPairsOfDeployableAndDeployment(deployables, deployments);

expiredPairsOfDeployableAndDeployment.forEach(expiredVersionDeploymentPair -> {
final var projectVersion = expiredVersionDeploymentPair.getLeft();
beforeStopDeployment.accept(projectVersion);
if (projectVersion instanceof WritableProjectVersion) {
deploymentManager.stopDeployment((WritableProjectVersion) projectVersion);
} else {
log.error("stopping is not supported ({})", kv("class_name", projectVersion.getClass()));
}
});
retrieveDeploymentsStart.stop(retrieveExpiredDeploymentsTimer);

expiredPairsOfDeployableAndDeployment.forEach(expiredDeploymentStopTimer.record(() ->
expiredVersionDeploymentPair -> {
final var projectVersion = expiredVersionDeploymentPair.getLeft();
beforeStopDeployment.accept(projectVersion);
if (projectVersion instanceof WritableProjectVersion) {
deploymentManager.stopDeployment((WritableProjectVersion) projectVersion);
} else {
log.error("stopping is not supported ({})", kv("class_name", projectVersion.getClass()));
}
})
);
}

private boolean shouldConsiderVersion(ProjectVersion<?, ?> version) {
Expand All @@ -72,14 +105,14 @@ private boolean shouldConsider(Optional<LifetimeBehaviour> behaviour) {
return behaviour.isPresent() && !behaviour.get().isInfinite();
}

private List<Deployment> getRelevantDeploymentsFor(List<ProjectVersion<?,?>> deployables) {
private List<Deployment> getRelevantDeploymentsFor(List<ProjectVersion<?, ?>> deployables) {
final var uuids = deployables.stream().map(ProjectVersion::getId).collect(Collectors.toSet());
return deploymentRepository.findAllByProjectVersionIdIn(uuids).stream()
.filter(deployment -> !deployment.getStatus().equals(DeployableStatus.NotScheduled))
.collect(Collectors.toList());
}

private Set<Pair<ProjectVersion<?,?>, Deployment>> getExpiredPairsOfDeployableAndDeployment(List<ProjectVersion<?,?>> versions, List<Deployment> deployments) {
private Set<Pair<ProjectVersion<?, ?>, Deployment>> getExpiredPairsOfDeployableAndDeployment(List<ProjectVersion<?, ?>> versions, List<Deployment> deployments) {
var combiningFunction = createExpiredDeployableDeploymentCombiningFunction(versions);
return deployments.stream()
.map(combiningFunction)
Expand All @@ -89,7 +122,7 @@ private Set<Pair<ProjectVersion<?,?>, Deployment>> getExpiredPairsOfDeployableAn
}

//what a method name
private Function<Deployment, Optional<Pair<ProjectVersion<?,?>, Deployment>>> createExpiredDeployableDeploymentCombiningFunction(List<ProjectVersion<?,?>> deployables) {
private Function<Deployment, Optional<Pair<ProjectVersion<?, ?>, Deployment>>> createExpiredDeployableDeploymentCombiningFunction(List<ProjectVersion<?, ?>> deployables) {
return (deployment) -> {

final var matchingDeployableOptional = deployables.stream()
Expand Down
20 changes: 19 additions & 1 deletion src/main/java/io/oneko/docker/DockerRegistryPolling.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import com.google.common.collect.Sets;

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import io.oneko.docker.event.NewProjectVersionFoundEvent;
import io.oneko.docker.event.ObsoleteProjectVersionRemovedEvent;
import io.oneko.docker.v2.DockerRegistryClientFactory;
Expand All @@ -34,6 +36,7 @@
import io.oneko.event.EventTrigger;
import io.oneko.event.ScheduledTask;
import io.oneko.kubernetes.DeploymentManager;
import io.oneko.metrics.MetricNameBuilder;
import io.oneko.project.ProjectRepository;
import io.oneko.project.ProjectVersion;
import io.oneko.project.ReadableProject;
Expand All @@ -47,6 +50,8 @@
@Slf4j
class DockerRegistryPolling {



@Data
private static class VersionWithDockerManifest {
private final WritableProjectVersion version;
Expand All @@ -63,18 +68,29 @@ private static class VersionWithDockerManifest {
private final EventTrigger asTrigger;
private final ExpiringBucket<UUID> failedManifestRequests = new ExpiringBucket<UUID>(Duration.ofMinutes(5)).concurrent();
private final CurrentEventTrigger currentEventTrigger;
private final Timer pollingJobTimer;
private final Timer updateDatesJobTimer;

DockerRegistryPolling(ProjectRepository projectRepository,
DockerRegistryClientFactory dockerRegistryClientFactory,
DeploymentManager deploymentManager,
EventDispatcher eventDispatcher,
CurrentEventTrigger currentEventTrigger) {
CurrentEventTrigger currentEventTrigger,
MeterRegistry meterRegistry) {
this.projectRepository = projectRepository;
this.dockerRegistryClientFactory = dockerRegistryClientFactory;
this.deploymentManager = deploymentManager;
this.eventDispatcher = eventDispatcher;
this.currentEventTrigger = currentEventTrigger;
this.asTrigger = new ScheduledTask("Docker Registry Polling");
this.pollingJobTimer = Timer.builder(new MetricNameBuilder().durationOf("docker.registry.polling.pollingJob").build())
.description("the duration of the docker polling job")
.publishPercentileHistogram()
.register(meterRegistry);
this.updateDatesJobTimer = Timer.builder(new MetricNameBuilder().durationOf("docker.registry.polling.updateDatesJob").build())
.description("the duration of the image date update job")
.publishPercentileHistogram()
.register(meterRegistry);
}

@Scheduled(fixedDelay = 20000, initialDelay = 10000)
Expand All @@ -99,6 +115,7 @@ protected void updateAndRedeployAllIfRequired() {
}

log.trace("finished polling job ({})", kv("duration_millis", stopWatch.getTime()));
pollingJobTimer.record(Duration.ofMillis(stopWatch.getTime()));
}
}

Expand All @@ -121,6 +138,7 @@ protected void updateDatesForAllImagesAndAllTags() {
}

log.trace("finished updating dates for all projects ({})", kv("duration_millis", stopWatch.getTime()));
updateDatesJobTimer.record(Duration.ofMillis(stopWatch.getTime()));
}

/**
Expand Down
Loading

0 comments on commit ce8a360

Please sign in to comment.