Browse Source

Add health apis

metrics
IamTheFij 1 month ago
parent
commit
bdbd9fb722
  1. 15
      job.go
  2. 5
      main.go
  3. 29
      restic.go
  4. 76
      scheduler.go
  5. 4
      tasks.go
  6. 39
      test/test.hcl

15
job.go

@ -225,12 +225,25 @@ func (j Job) Healthy() (bool, error) {
}
func (j Job) Run() {
result := JobResult{
JobName: j.Name,
JobType: "backup",
Success: true,
LastError: nil,
Message: "",
}
if err := j.RunBackup(); err != nil {
j.healthy = false
j.lastErr = err
j.Logger().Printf("ERROR: Backup failed: %v", err)
j.Logger().Printf("ERROR: Backup failed: %s", err.Error())
result.Success = false
result.LastError = err
}
JobComplete(result)
}
func (j Job) NewRestic() *Restic {

5
main.go

@ -136,6 +136,7 @@ func main() {
backup := flag.String("backup", "", "Run backup jobs now. Names are comma separated and `all` will run all.")
restore := flag.String("restore", "", "Run restore jobs now. Names are comma separated and `all` will run all.")
once := flag.Bool("once", false, "Run jobs specified using -backup and -restore once and exit")
healthCheckAddr := flag.String("addr", "0.0.0.0:8080", "address to bind health check API")
flag.StringVar(&JobBaseDir, "base-dir", JobBaseDir, "Base dir to create intermediate job files like SQL dumps.")
flag.Parse()
@ -174,6 +175,10 @@ func main() {
return
}
go func() {
_ = RunHTTPHandlers(*healthCheckAddr)
}()
// TODO: Add healthcheck handler using Job.Healthy()
if err := ScheduleAndRunJobs(jobs); err != nil {
log.Fatalf("failed running jobs: %v", err)

29
restic.go

@ -241,6 +241,33 @@ func (rcmd Restic) BuildEnv() []string {
return envList
}
type ResticError struct {
OriginalError error
Command string
Output []string
}
func NewResticError(command string, output []string, originalError error) *ResticError {
return &ResticError{
OriginalError: originalError,
Command: command,
Output: output,
}
}
func (e *ResticError) Error() string {
return fmt.Sprintf(
"error running restic %s: %s\nOutput:\n%s",
e.Command,
e.OriginalError,
strings.Join(e.Output, "\n"),
)
}
func (e *ResticError) Unwrap() error {
return e.OriginalError
}
func (rcmd Restic) RunRestic(command string, options CommandOptions, commandArgs ...string) ([]string, error) {
args := []string{}
if rcmd.GlobalOpts != nil {
@ -265,7 +292,7 @@ func (rcmd Restic) RunRestic(command string, options CommandOptions, commandArgs
responseErr = ErrRepoNotFound
}
return output.Lines, fmt.Errorf("error running restic %s: %w", command, responseErr)
return output.Lines, NewResticError(command, output.Lines, responseErr)
}
return output.Lines, nil

76
scheduler.go

@ -1,40 +1,102 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"os"
"os/signal"
"sync"
"syscall"
"github.com/robfig/cron/v3"
)
var jobResultsLock = sync.Mutex{}
var jobResults = map[string]JobResult{}
type JobResult struct {
JobName string
JobType string
Success bool
LastError error
Message string
}
func (r JobResult) Format() string {
return fmt.Sprintf("%s %s ok? %v\n\n%+v", r.JobName, r.JobType, r.Success, r.LastError)
}
func JobComplete(result JobResult) {
fmt.Printf("Completed job %+v\n", result)
jobResultsLock.Lock()
jobResults[result.JobName] = result
jobResultsLock.Unlock()
}
func writeJobResult(writer http.ResponseWriter, jobName string) {
if jobResult, ok := jobResults[jobName]; ok {
if !jobResult.Success {
writer.WriteHeader(http.StatusServiceUnavailable)
}
jobResult.Message = jobResult.LastError.Error()
if err := json.NewEncoder(writer).Encode(jobResult); err != nil {
_, _ = writer.Write([]byte(fmt.Sprintf("failed writing json for %s", jobResult.Format())))
}
writer.Header().Set("Content-Type", "application/json")
} else {
writer.WriteHeader(http.StatusNotFound)
_, _ = writer.Write([]byte("{\"Message\": \"Unknown job\"}"))
}
}
func healthHandleFunc(writer http.ResponseWriter, request *http.Request) {
query := request.URL.Query()
if jobName, ok := query["job"]; ok {
writeJobResult(writer, jobName[0])
return
}
_, _ = writer.Write([]byte("ok"))
}
func RunHTTPHandlers(addr string) error {
http.HandleFunc("/health", healthHandleFunc)
return fmt.Errorf("error on healthcheck: %w", http.ListenAndServe(addr, nil))
}
func ScheduleAndRunJobs(jobs []Job) error {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan,
signal.Notify(
signalChan,
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGQUIT,
)
runner := cron.New()
scheduler := cron.New()
for _, job := range jobs {
fmt.Println("Scheduling", job.Name)
if _, err := runner.AddJob(job.Schedule, job); err != nil {
return fmt.Errorf("Error scheduling job %s: %w", job.Name, err)
if _, err := scheduler.AddJob(job.Schedule, job); err != nil {
return fmt.Errorf("error scheduling job %s: %w", job.Name, err)
}
}
runner.Start()
scheduler.Start()
switch <-signalChan {
case syscall.SIGINT:
fmt.Println("Stopping now...")
defer runner.Stop()
defer scheduler.Stop()
return nil
case syscall.SIGTERM:
@ -44,7 +106,7 @@ func ScheduleAndRunJobs(jobs []Job) error {
fmt.Println("Stopping after running jobs complete...")
defer func() {
ctx := runner.Stop()
ctx := scheduler.Stop()
<-ctx.Done()
}()

4
tasks.go

@ -223,7 +223,7 @@ func (t BackupFilesTask) RunBackup(cfg TaskConfig) error {
if err := cfg.Restic.Backup(cfg.BackupPaths, *t.BackupOpts); err != nil {
err = fmt.Errorf("failed backing up paths: %w", err)
cfg.Logger.Fatal(err)
cfg.Logger.Print(err)
return err
}
@ -239,7 +239,7 @@ func (t BackupFilesTask) RunRestore(cfg TaskConfig) error {
// TODO: Make the snapshot configurable
if err := cfg.Restic.Restore("latest", *t.RestoreOpts); err != nil {
err = fmt.Errorf("failed restoring paths: %w", err)
cfg.Logger.Fatal(err)
cfg.Logger.Print(err)
return err
}

39
test/test.hcl

@ -1,40 +1,37 @@
job "TestBackup" {
schedule = "1 * * * *"
schedule = "* * * * *"
config {
repo = "./backups"
repo = "test/data/backups"
passphrase = "supersecret"
options {
CacheDir = "./cache"
CacheDir = "test/data/cache"
}
}
task "before script" {
script {
on_backup = "echo before backup!"
task "create test data" {
pre_script {
on_backup = "echo test > test/data/data/test.txt"
}
}
task "backup" {
backup {
files = [
"./data"
]
backup_opts {
Tags = ["foo"]
}
task "backup phases" {
pre_script {
on_backup = "echo 'pre-backup'"
on_restore = "echo 'pre-restore'"
}
restore_opts {
Target = "."
}
post_script {
on_backup = "echo 'post-backup'"
on_restore = "echo 'post-restore'"
}
}
task "after script" {
script {
on_backup = "echo after backup!"
backup {
paths = ["./test/data/data"]
restore_opts {
Target = "."
}
}

Loading…
Cancel
Save