236 lines
6.7 KiB
Go
236 lines
6.7 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/rs/zerolog"
|
|
"github.com/rs/zerolog/log"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/rest"
|
|
)
|
|
|
|
const (
|
|
// Exit code indicating clean idle shutdown
|
|
ExitCodeIdleShutdown = 42
|
|
|
|
// Poll interval for checking main container status
|
|
PollInterval = 5 * time.Second
|
|
)
|
|
|
|
func main() {
|
|
// Setup logging
|
|
zerolog.TimeFieldFormat = zerolog.TimeFormatUnix
|
|
log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stderr})
|
|
|
|
log.Info().Msg("Lifecycle sidecar starting")
|
|
|
|
// Get environment configuration
|
|
namespace := os.Getenv("NAMESPACE")
|
|
deploymentName := os.Getenv("DEPLOYMENT_NAME")
|
|
userType := os.Getenv("USER_TYPE")
|
|
mainContainerPID := os.Getenv("MAIN_CONTAINER_PID")
|
|
|
|
if namespace == "" || deploymentName == "" {
|
|
log.Fatal().Msg("NAMESPACE and DEPLOYMENT_NAME environment variables are required")
|
|
}
|
|
|
|
log.Info().
|
|
Str("namespace", namespace).
|
|
Str("deployment", deploymentName).
|
|
Str("userType", userType).
|
|
Str("mainPID", mainContainerPID).
|
|
Msg("Configuration loaded")
|
|
|
|
// Create Kubernetes client
|
|
config, err := rest.InClusterConfig()
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to get in-cluster config")
|
|
}
|
|
|
|
clientset, err := kubernetes.NewForConfig(config)
|
|
if err != nil {
|
|
log.Fatal().Err(err).Msg("Failed to create Kubernetes client")
|
|
}
|
|
|
|
// Wait for main container to exit
|
|
exitCode := waitForMainContainer()
|
|
|
|
log.Info().Int("exitCode", exitCode).Msg("Main container exited")
|
|
|
|
// Handle exit code
|
|
if exitCode == ExitCodeIdleShutdown {
|
|
log.Info().Msg("Detected idle shutdown (exit code 42) - cleaning up deployment")
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
|
defer cancel()
|
|
|
|
// Delete PVC if anonymous user
|
|
deletePVC := userType == "anonymous" || userType == "temporary"
|
|
|
|
if err := cleanupDeployment(ctx, clientset, namespace, deploymentName, deletePVC); err != nil {
|
|
log.Error().Err(err).Msg("Failed to cleanup deployment")
|
|
os.Exit(1)
|
|
}
|
|
|
|
log.Info().Msg("Cleanup complete - sidecar exiting")
|
|
os.Exit(0)
|
|
} else {
|
|
// Any other exit code - let Kubernetes restart policy handle it
|
|
log.Info().
|
|
Int("exitCode", exitCode).
|
|
Msg("Non-idle exit code - allowing Kubernetes to handle restart")
|
|
os.Exit(exitCode)
|
|
}
|
|
}
|
|
|
|
// waitForMainContainer monitors the main container process and returns its exit code
|
|
func waitForMainContainer() int {
|
|
// Try multiple methods to detect main container exit
|
|
// Method 1: Poll for process via shared PID namespace
|
|
mainPID := os.Getenv("MAIN_CONTAINER_PID")
|
|
if mainPID != "" {
|
|
return pollProcessExit(mainPID)
|
|
}
|
|
|
|
// Method 2: Poll for agent process by name (fallback)
|
|
log.Info().Msg("MAIN_CONTAINER_PID not set, polling for 'agent' process")
|
|
return pollProcessByName("agent")
|
|
}
|
|
|
|
// pollProcessExit polls for process exit by PID
|
|
func pollProcessExit(pidStr string) int {
|
|
log.Info().Str("pid", pidStr).Msg("Monitoring main container process")
|
|
|
|
for {
|
|
// Check if process exists
|
|
cmd := exec.Command("kill", "-0", pidStr)
|
|
err := cmd.Run()
|
|
|
|
if err != nil {
|
|
// Process no longer exists - get exit code from /proc if available
|
|
log.Info().Msg("Main container process exited")
|
|
|
|
// Try to get actual exit code (this is a best-effort)
|
|
// In Kubernetes, we might not have access to the actual exit code
|
|
// So we check if the container restarted via container status
|
|
return getContainerExitCode()
|
|
}
|
|
|
|
time.Sleep(PollInterval)
|
|
}
|
|
}
|
|
|
|
// pollProcessByName polls for process exit by name
|
|
func pollProcessByName(name string) int {
|
|
log.Info().Str("name", name).Msg("Monitoring main container by name")
|
|
|
|
for {
|
|
cmd := exec.Command("pgrep", "-x", name)
|
|
err := cmd.Run()
|
|
|
|
if err != nil {
|
|
log.Info().Msg("Main container process exited")
|
|
return getContainerExitCode()
|
|
}
|
|
|
|
time.Sleep(PollInterval)
|
|
}
|
|
}
|
|
|
|
// getContainerExitCode attempts to retrieve the exit code of the main container
|
|
// This is challenging in Kubernetes without direct access to container runtime
|
|
// We use a fallback approach: check a shared file or default to 0
|
|
func getContainerExitCode() int {
|
|
// Check if main container wrote exit code to shared volume
|
|
exitCodeFile := "/var/run/agent/exit_code"
|
|
data, err := os.ReadFile(exitCodeFile)
|
|
if err == nil {
|
|
var exitCode int
|
|
_, err := fmt.Sscanf(string(data), "%d", &exitCode)
|
|
if err == nil {
|
|
log.Info().Int("exitCode", exitCode).Msg("Read exit code from shared file")
|
|
return exitCode
|
|
}
|
|
}
|
|
|
|
// Default to 0 if we can't determine exit code
|
|
// This is safe because non-42 codes allow restart
|
|
log.Warn().Msg("Could not determine exit code, defaulting to 0")
|
|
return 0
|
|
}
|
|
|
|
// cleanupDeployment deletes the deployment and optionally the PVC
|
|
func cleanupDeployment(ctx context.Context, clientset *kubernetes.Clientset, namespace, deploymentName string, deletePVC bool) error {
|
|
log.Info().
|
|
Str("namespace", namespace).
|
|
Str("deployment", deploymentName).
|
|
Bool("deletePVC", deletePVC).
|
|
Msg("Cleaning up deployment")
|
|
|
|
// Get deployment to find PVC name if needed
|
|
var pvcName string
|
|
if deletePVC {
|
|
deployment, err := clientset.AppsV1().Deployments(namespace).Get(ctx, deploymentName, metav1.GetOptions{})
|
|
if err != nil {
|
|
log.Warn().Err(err).Msg("Could not get deployment for PVC lookup")
|
|
} else {
|
|
// Find PVC from volume claim templates or volumes
|
|
if len(deployment.Spec.Template.Spec.Volumes) > 0 {
|
|
for _, vol := range deployment.Spec.Template.Spec.Volumes {
|
|
if vol.PersistentVolumeClaim != nil {
|
|
pvcName = vol.PersistentVolumeClaim.ClaimName
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Delete deployment
|
|
deletePolicy := metav1.DeletePropagationForeground
|
|
deleteOptions := metav1.DeleteOptions{
|
|
PropagationPolicy: &deletePolicy,
|
|
}
|
|
|
|
log.Info().Str("deployment", deploymentName).Msg("Deleting deployment")
|
|
err := clientset.AppsV1().Deployments(namespace).Delete(ctx, deploymentName, deleteOptions)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to delete deployment: %w", err)
|
|
}
|
|
|
|
log.Info().Msg("Deployment deleted successfully")
|
|
|
|
// Delete PVC if requested and found
|
|
if deletePVC && pvcName != "" {
|
|
log.Info().Str("pvc", pvcName).Msg("Deleting PVC")
|
|
err := clientset.CoreV1().PersistentVolumeClaims(namespace).Delete(ctx, pvcName, metav1.DeleteOptions{})
|
|
if err != nil {
|
|
log.Warn().Err(err).Str("pvc", pvcName).Msg("Failed to delete PVC (non-fatal)")
|
|
} else {
|
|
log.Info().Msg("PVC deleted successfully")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func init() {
|
|
// Register signal handler for graceful shutdown
|
|
// If sidecar receives SIGTERM, just exit cleanly
|
|
// Don't trigger deployment deletion on sidecar termination
|
|
go func() {
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGTERM)
|
|
<-sigChan
|
|
log.Info().Msg("Received SIGTERM - sidecar exiting without cleanup")
|
|
os.Exit(0)
|
|
}()
|
|
}
|