From c9d027d08ee3e13e3e3de8da22426f6ebac8b07c Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 12:46:43 +0100
Subject: [PATCH 1/6] remove unused metric

---
 .../module_controller_with_runner_test.go     |  1 -
 metrics/mock_prometheus.go                    | 12 ------
 metrics/prometheus.go                         | 39 +++----------------
 runner/tfexec.go                              | 31 +--------------
 4 files changed, 6 insertions(+), 77 deletions(-)

diff --git a/integration_test/module_controller_with_runner_test.go b/integration_test/module_controller_with_runner_test.go
index 31d97154..42416ed7 100644
--- a/integration_test/module_controller_with_runner_test.go
+++ b/integration_test/module_controller_with_runner_test.go
@@ -62,7 +62,6 @@ var _ = Describe("Module controller with Runner", func() {
 
 			testMetrics.EXPECT().UpdateModuleRunDuration(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes()
 			testMetrics.EXPECT().UpdateModuleSuccess(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes()
-			testMetrics.EXPECT().UpdateTerraformExitCodeCount(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes()
 			testMetrics.EXPECT().SetRunPending(gomock.Any(), gomock.Any(), gomock.Any()).AnyTimes()
 
 			// clear state file if exits
diff --git a/metrics/mock_prometheus.go b/metrics/mock_prometheus.go
index 2e85894f..b0a7b2a2 100644
--- a/metrics/mock_prometheus.go
+++ b/metrics/mock_prometheus.go
@@ -68,15 +68,3 @@ func (mr *MockPrometheusInterfaceMockRecorder) UpdateModuleSuccess(arg0, arg1, a
 	mr.mock.ctrl.T.Helper()
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateModuleSuccess", reflect.TypeOf((*MockPrometheusInterface)(nil).UpdateModuleSuccess), arg0, arg1, arg2, arg3)
 }
-
-// UpdateTerraformExitCodeCount mocks base method.
-func (m *MockPrometheusInterface) UpdateTerraformExitCodeCount(arg0, arg1, arg2 string, arg3 int) {
-	m.ctrl.T.Helper()
-	m.ctrl.Call(m, "UpdateTerraformExitCodeCount", arg0, arg1, arg2, arg3)
-}
-
-// UpdateTerraformExitCodeCount indicates an expected call of UpdateTerraformExitCodeCount.
-func (mr *MockPrometheusInterfaceMockRecorder) UpdateTerraformExitCodeCount(arg0, arg1, arg2, arg3 interface{}) *gomock.Call {
-	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateTerraformExitCodeCount", reflect.TypeOf((*MockPrometheusInterface)(nil).UpdateTerraformExitCodeCount), arg0, arg1, arg2, arg3)
-}
diff --git a/metrics/prometheus.go b/metrics/prometheus.go
index ef2bdcfa..cf668272 100644
--- a/metrics/prometheus.go
+++ b/metrics/prometheus.go
@@ -16,7 +16,6 @@ const (
 
 // PrometheusInterface allows for mocking out the functionality of Prometheus when testing the full process of an apply run.
 type PrometheusInterface interface {
-	UpdateTerraformExitCodeCount(string, string, string, int)
 	UpdateModuleSuccess(string, string, string, bool)
 	UpdateModuleRunDuration(string, string, string, float64, bool)
 	SetRunPending(string, string, bool)
@@ -29,12 +28,11 @@ type PrometheusInterface interface {
 // moduleRunSuccess is the last run outcome of the module run.
 // moduleRunning is the number of modules currently in running state.
 type Prometheus struct {
-	terraformExitCodeCount *prometheus.CounterVec
-	moduleRunCount         *prometheus.CounterVec
-	moduleRunDuration      *prometheus.HistogramVec
-	moduleRunPending       *prometheus.GaugeVec
-	moduleRunSuccess       *prometheus.GaugeVec
-	moduleRunTimestamp     *prometheus.GaugeVec
+	moduleRunCount     *prometheus.CounterVec
+	moduleRunDuration  *prometheus.HistogramVec
+	moduleRunPending   *prometheus.GaugeVec
+	moduleRunSuccess   *prometheus.GaugeVec
+	moduleRunTimestamp *prometheus.GaugeVec
 }
 
 // Init creates and registers the custom metrics for terraform-applier.
@@ -112,22 +110,6 @@ func (p *Prometheus) Init() {
 			"run_type",
 		},
 	)
-	p.terraformExitCodeCount = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: metricsNamespace,
-		Name:      "module_terraform_exit_code_count",
-		Help:      "Count of terraform exit codes",
-	},
-		[]string{
-			// Name of the module that was ran
-			"module",
-			// Namespace name of the module that was ran
-			"namespace",
-			// plan, apply, init etc
-			"command",
-			// Exit code
-			"exit_code",
-		},
-	)
 
 	// Register custom metrics with the global prometheus registry
 	metrics.Registry.MustRegister(
@@ -136,21 +118,10 @@ func (p *Prometheus) Init() {
 		p.moduleRunSuccess,
 		p.moduleRunPending,
 		p.moduleRunTimestamp,
-		p.terraformExitCodeCount,
 	)
 
 }
 
-// UpdateTerraformExitCodeCount increments for each exit code returned by terraform
-func (p *Prometheus) UpdateTerraformExitCodeCount(module, namespace string, cmd string, code int) {
-	p.terraformExitCodeCount.With(prometheus.Labels{
-		"module":    module,
-		"namespace": namespace,
-		"command":   cmd,
-		"exit_code": strconv.Itoa(code),
-	}).Inc()
-}
-
 // UpdateModuleSuccess increments the given module's Counter for either successful or failed run attempts.
 func (p *Prometheus) UpdateModuleSuccess(module, namespace, runType string, success bool) {
 	if success {
diff --git a/runner/tfexec.go b/runner/tfexec.go
index f9dcd0d1..219d2b2a 100644
--- a/runner/tfexec.go
+++ b/runner/tfexec.go
@@ -4,15 +4,12 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
-	"errors"
 	"fmt"
 	"os"
-	"os/exec"
 	"path/filepath"
 
 	"github.com/hashicorp/terraform-exec/tfexec"
 	tfaplv1beta1 "github.com/utilitywarehouse/terraform-applier/api/v1beta1"
-	"github.com/utilitywarehouse/terraform-applier/metrics"
 	"github.com/utilitywarehouse/terraform-applier/sysutil"
 )
 
@@ -36,8 +33,7 @@ type tfRunner struct {
 	workingDir      string
 	planFileName    string
 
-	metrics metrics.PrometheusInterface
-	tf      *tfexec.Terraform
+	tf *tfexec.Terraform
 }
 
 func (r *Runner) NewTFRunner(
@@ -73,7 +69,6 @@ func (r *Runner) NewTFRunner(
 	tfr := &tfRunner{
 		moduleName:      module.Name,
 		moduleNamespace: module.Namespace,
-		metrics:         r.Metrics,
 		rootDir:         tmpRoot,
 		workingDir:      filepath.Join(tmpRoot, module.Spec.Path),
 		planFileName:    "plan.out",
@@ -174,14 +169,8 @@ func (te *tfRunner) init(ctx context.Context, backendConf map[string]string) (st
 	}
 
 	if err := te.tf.Init(ctx, opts...); err != nil {
-		if uerr := errors.Unwrap(err); uerr != nil {
-			if e, ok := uerr.(*exec.ExitError); ok {
-				te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "init", e.ExitCode())
-			}
-		}
 		return out.String(), err
 	}
-	te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "init", 0)
 
 	return out.String(), nil
 }
@@ -195,19 +184,8 @@ func (te *tfRunner) plan(ctx context.Context) (bool, string, error) {
 
 	changes, err := te.tf.Plan(ctx, tfexec.Out(planOut))
 	if err != nil {
-		if uerr := errors.Unwrap(err); uerr != nil {
-			if e, ok := uerr.(*exec.ExitError); ok {
-				te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "plan", e.ExitCode())
-			}
-		}
 		return changes, out.String(), err
 	}
-	if changes {
-		te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "plan", 2)
-	} else {
-		te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "plan", 0)
-	}
-
 	return changes, out.String(), nil
 }
 
@@ -231,15 +209,8 @@ func (te *tfRunner) apply(ctx context.Context) (string, error) {
 	}
 
 	if err := te.tf.Apply(ctx, tfexec.DirOrPlan(planOut)); err != nil {
-		if uerr := errors.Unwrap(err); uerr != nil {
-			if e, ok := uerr.(*exec.ExitError); ok {
-				te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "apply", e.ExitCode())
-			}
-		}
 		return out.String(), err
 	}
 
-	te.metrics.UpdateTerraformExitCodeCount(te.moduleName, te.moduleNamespace, "apply", 0)
-
 	return out.String(), nil
 }

From 624a89adaecca50722da147c69c57efbf05457bc Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 14:16:22 +0100
Subject: [PATCH 2/6] add module info metrics

---
 main.go               | 16 ++++++++++++++++
 metrics/prometheus.go | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/main.go b/main.go
index cd703aaf..0686a271 100644
--- a/main.go
+++ b/main.go
@@ -719,6 +719,22 @@ func run(c *cli.Context) {
 		logger.Info("OIDC authentication configured", "issuer", c.String("oidc-issuer"), "clientID", c.String("oidc-client-id"))
 	}
 
+	go func() {
+		ticker := time.NewTicker(time.Minute)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ticker.C:
+				if err := metrics.CollectModuleInfo(ctx, mgr.GetClient()); err != nil {
+					logger.Error("unable to collect module info metrics", "error", err)
+				}
+			case <-ctx.Done():
+				return
+			}
+		}
+	}()
+
 	webserver := &webserver.WebServer{
 		Authenticator: oidcAuthenticator,
 		ListenAddress: c.String("webserver-bind-address"),
diff --git a/metrics/prometheus.go b/metrics/prometheus.go
index cf668272..ebc525a3 100644
--- a/metrics/prometheus.go
+++ b/metrics/prometheus.go
@@ -1,10 +1,13 @@
 package metrics
 
 import (
+	"context"
 	"strconv"
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
+	tfaplv1beta1 "github.com/utilitywarehouse/terraform-applier/api/v1beta1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/metrics"
 )
 
@@ -33,10 +36,27 @@ type Prometheus struct {
 	moduleRunPending   *prometheus.GaugeVec
 	moduleRunSuccess   *prometheus.GaugeVec
 	moduleRunTimestamp *prometheus.GaugeVec
+	moduleInfo         *prometheus.GaugeVec
 }
 
 // Init creates and registers the custom metrics for terraform-applier.
 func (p *Prometheus) Init() {
+	p.moduleInfo = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: metricsNamespace,
+		Name:      "module_info",
+		Help:      "Current information about module including status",
+	},
+		[]string{
+			"module",
+			// Namespace name of the module that was ran
+			"namespace",
+			// state of the module
+			"state",
+			// potential reason associated with current state
+			"reason",
+		},
+	)
+
 	p.moduleRunCount = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: metricsNamespace,
 		Name:      "module_run_count",
@@ -118,6 +138,7 @@ func (p *Prometheus) Init() {
 		p.moduleRunSuccess,
 		p.moduleRunPending,
 		p.moduleRunTimestamp,
+		p.moduleInfo,
 	)
 
 }
@@ -174,3 +195,25 @@ func (p *Prometheus) SetRunPending(module, namespace string, pending bool) {
 		"namespace": namespace,
 	}).Set(as)
 }
+
+// CollectModuleInfo when called resets 'module_info' and collect current state of the modules
+func (p *Prometheus) CollectModuleInfo(ctx context.Context, kc client.Client) error {
+
+	kubeModuleList := &tfaplv1beta1.ModuleList{}
+	if err := kc.List(ctx, kubeModuleList); err != nil {
+		return err
+	}
+
+	// reset all values and re-set current value
+	p.moduleInfo.Reset()
+
+	for _, m := range kubeModuleList.Items {
+		p.moduleInfo.With(prometheus.Labels{
+			"module":    m.Name,
+			"namespace": m.Namespace,
+			"state":     m.Status.CurrentState,
+			"reason":    m.Status.StateReason,
+		}).Set(1)
+	}
+	return nil
+}

From 8b25f9e16f86360d9d37e2390e1d18371d576ba7 Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 14:16:41 +0100
Subject: [PATCH 3/6] PR run doesnt annotations

---
 runner/runner.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/runner/runner.go b/runner/runner.go
index 4521ddc8..b4eedc29 100644
--- a/runner/runner.go
+++ b/runner/runner.go
@@ -132,7 +132,8 @@ func (r *Runner) process(run *tfaplv1beta1.Run, cancelChan <-chan struct{}, envs
 	defer func() {
 		// there are no annotations for schedule and polling runs
 		if run.Request.Type == tfaplv1beta1.ScheduledRun ||
-			run.Request.Type == tfaplv1beta1.PollingRun {
+			run.Request.Type == tfaplv1beta1.PollingRun ||
+			run.Request.Type == tfaplv1beta1.PRPlan {
 			return
 		}
 		if err := sysutil.RemoveRequest(context.Background(), r.ClusterClt, run.Module, run.Request); err != nil {

From 36cf11454b28a12d7532be1724c7ed8f7f0944e3 Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 15:00:35 +0100
Subject: [PATCH 4/6] do intial collection at start

---
 main.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/main.go b/main.go
index 0686a271..61f5ff98 100644
--- a/main.go
+++ b/main.go
@@ -720,9 +720,12 @@ func run(c *cli.Context) {
 	}
 
 	go func() {
+		if err := metrics.CollectModuleInfo(ctx, mgr.GetClient()); err != nil {
+			logger.Error("unable to collect module info metrics", "error", err)
+		}
+
 		ticker := time.NewTicker(time.Minute)
 		defer ticker.Stop()
-
 		for {
 			select {
 			case <-ticker.C:

From fef7608cd99d485a6baa4f56bd85ed0d25948d43 Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 15:11:34 +0100
Subject: [PATCH 5/6] updated README

---
 README.md | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 0428b64d..e9e206d3 100644
--- a/README.md
+++ b/README.md
@@ -301,16 +301,14 @@ terraform-applier exports Prometheus metrics. The metrics are available on given
 
 In addition to the [controller-runtime](https://book.kubebuilder.io/reference/metrics-reference.html) default metrics, the following custom metrics are included:
 
-- `terraform_applier_module_run_count` - (tags: `module`,`namespace`, `success`) A Counter for each module that has had a terraform run attempt over the lifetime of
+- `terraform_applier_module_info`- (tags: `module`,`namespace`, `state`, `reason`) A Gauge that captures the current information about module including status
+- `terraform_applier_module_run_count` - (tags: `module`,`namespace`, `run_type`, `success`) A Counter for each module that has had a terraform run attempt over the lifetime of
   the application, incremented with each apply attempt and tagged with the result of the run (`success=true|false`)
-- `terraform_applier_module_run_duration_seconds` - (tags: `module`,`namespace`, `success`) A Summary that keeps track of the durations of each terraform run for
+- `terraform_applier_module_run_duration_seconds` - (tags: `module`,`namespace`, `run_type`, `success`) A Summary that keeps track of the durations of each terraform run for
   each module, tagged with the result of the run (`success=true|false`)
-- `terraform_applier_module_last_run_success` - (tags: `module`,`namespace`) A `Gauge` which
+- `terraform_applier_module_last_run_success` - (tags: `module`,`namespace`, `run_type`) A `Gauge` which
   tracks whether the last terraform run for a module was successful.
-- `terraform_applier_module_last_run_timestamp` - (tags: `module`,`namespace`) A Gauge that captures the Timestamp of the last successful module run.
-- `terraform_applier_module_terraform_exit_code_count` - (tags: `module`,`namespace`, `command`, `exit_code`) A `Counter` for each exit code returned by executions of
-  `terraform`, labelled with the command issued (`init`, `plan`,`apply`) and the exit code. It's worth noting that `plan` will
-  return a code of `2` if there are changes to be made, which is not an error or a failure, so you may wish to account for this in your alerting.
+- `terraform_applier_module_last_run_timestamp` - (tags: `module`,`namespace`,`run_type`) A Gauge that captures the Timestamp of the last successful module run.
 - `terraform_applier_git_last_mirror_timestamp` - (tags: `repo`) A Gauge that captures the Timestamp of the last successful git sync per repo.
 - `terraform_applier_git_mirror_count` - (tags: `repo`,`success`) A Counter for each repo sync, incremented with each sync attempt and tagged with the result (`success=true|false`)
 - `terraform_applier_git_mirror_latency_seconds` - (tags: `repo`) A Summary that keeps track of the git sync latency per repo.

From 891b2f52984b8a59366abad39819c8c69d0745ec Mon Sep 17 00:00:00 2001
From: Ashok Siyani <ashok.siyani@gmail.com>
Date: Wed, 7 Aug 2024 15:14:37 +0100
Subject: [PATCH 6/6] get client once

---
 main.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/main.go b/main.go
index 61f5ff98..38f614e1 100644
--- a/main.go
+++ b/main.go
@@ -719,8 +719,8 @@ func run(c *cli.Context) {
 		logger.Info("OIDC authentication configured", "issuer", c.String("oidc-issuer"), "clientID", c.String("oidc-client-id"))
 	}
 
-	go func() {
-		if err := metrics.CollectModuleInfo(ctx, mgr.GetClient()); err != nil {
+	go func(client client.Client) {
+		if err := metrics.CollectModuleInfo(ctx, client); err != nil {
 			logger.Error("unable to collect module info metrics", "error", err)
 		}
 
@@ -729,14 +729,14 @@ func run(c *cli.Context) {
 		for {
 			select {
 			case <-ticker.C:
-				if err := metrics.CollectModuleInfo(ctx, mgr.GetClient()); err != nil {
+				if err := metrics.CollectModuleInfo(ctx, client); err != nil {
 					logger.Error("unable to collect module info metrics", "error", err)
 				}
 			case <-ctx.Done():
 				return
 			}
 		}
-	}()
+	}(mgr.GetClient())
 
 	webserver := &webserver.WebServer{
 		Authenticator: oidcAuthenticator,