From 36fc79dfc68a7a49c0718af50cbdc017c0417966 Mon Sep 17 00:00:00 2001 From: Simone Gotti Date: Thu, 21 Feb 2019 15:54:50 +0100 Subject: [PATCH] runservice: initial commit --- go.mod | 35 +- go.sum | 101 +- internal/runconfig/runconfig.go | 351 +++++ internal/runconfig/runconfig_test.go | 684 +++++++++ internal/services/runservice/executor/api.go | 217 +++ .../runservice/executor/driver/docker.go | 468 +++++++ .../runservice/executor/driver/docker_test.go | 242 ++++ .../runservice/executor/driver/driver.go | 90 ++ .../services/runservice/executor/executor.go | 931 +++++++++++++ .../services/runservice/scheduler/api/api.go | 569 ++++++++ .../runservice/scheduler/api/client.go | 252 ++++ .../runservice/scheduler/api/executor.go | 247 ++++ .../runservice/scheduler/command/command.go | 277 ++++ .../runservice/scheduler/common/common.go | 114 ++ .../runservice/scheduler/common/events.go | 46 + .../runservice/scheduler/readdb/create.go | 41 + .../runservice/scheduler/readdb/readdb.go | 982 +++++++++++++ .../runservice/scheduler/scheduler.go | 1241 +++++++++++++++++ .../runservice/scheduler/store/store.go | 551 ++++++++ internal/services/runservice/types/types.go | 515 +++++++ internal/util/path.go | 33 + internal/util/sha.go | 37 + 22 files changed, 7984 insertions(+), 40 deletions(-) create mode 100644 internal/runconfig/runconfig.go create mode 100644 internal/runconfig/runconfig_test.go create mode 100644 internal/services/runservice/executor/api.go create mode 100644 internal/services/runservice/executor/driver/docker.go create mode 100644 internal/services/runservice/executor/driver/docker_test.go create mode 100644 internal/services/runservice/executor/driver/driver.go create mode 100644 internal/services/runservice/executor/executor.go create mode 100644 internal/services/runservice/scheduler/api/api.go create mode 100644 internal/services/runservice/scheduler/api/client.go create mode 100644 internal/services/runservice/scheduler/api/executor.go create mode 100644 internal/services/runservice/scheduler/command/command.go create mode 100644 internal/services/runservice/scheduler/common/common.go create mode 100644 internal/services/runservice/scheduler/common/events.go create mode 100644 internal/services/runservice/scheduler/readdb/create.go create mode 100644 internal/services/runservice/scheduler/readdb/readdb.go create mode 100644 internal/services/runservice/scheduler/scheduler.go create mode 100644 internal/services/runservice/scheduler/store/store.go create mode 100644 internal/services/runservice/types/types.go create mode 100644 internal/util/path.go create mode 100644 internal/util/sha.go diff --git a/go.mod b/go.mod index 09cd2d5..5f2b99e 100644 --- a/go.mod +++ b/go.mod @@ -1,28 +1,51 @@ module github.com/sorintlab/agola require ( - github.com/Masterminds/squirrel v1.1.0 + github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect + github.com/Masterminds/squirrel v0.0.0-20181204161840-e5bf00f96d4a + github.com/Microsoft/go-winio v0.4.11 // indirect + github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect github.com/bmatcuk/doublestar v1.1.1 + github.com/containerd/continuity v0.0.0-20181203112020-004b46473808 // indirect + github.com/docker/distribution v2.7.1+incompatible // indirect + github.com/docker/docker v1.13.1 + github.com/docker/go-connections v0.4.0 // indirect + github.com/docker/go-units v0.3.3 // indirect github.com/go-ini/ini v1.42.0 // indirect github.com/go-sql-driver/mysql v1.4.1 // indirect github.com/google/go-cmp v0.3.0 github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e // indirect + github.com/gorilla/context v1.1.1 // indirect + github.com/gorilla/handlers v1.4.0 + github.com/gorilla/mux v1.6.2 + github.com/hashicorp/go-sockaddr v1.0.1 + github.com/jtolds/gls v4.2.1+incompatible // indirect github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect + github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect + github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lib/pq v1.0.0 // indirect github.com/mattn/go-sqlite3 v1.10.0 github.com/minio/minio-go v6.0.14+incompatible github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/opencontainers/go-digest v1.0.0-rc1 // indirect + github.com/opencontainers/image-spec v1.0.1 // indirect + github.com/opencontainers/runc v0.1.1 // indirect github.com/pkg/errors v0.8.0 github.com/sanity-io/litter v1.1.0 github.com/satori/go.uuid v1.2.0 github.com/sgotti/gexpect v0.0.0-20161123102107-0afc6c19f50a - github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff // indirect + github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304 // indirect + github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c // indirect github.com/spf13/cobra v0.0.3 go.etcd.io/etcd v0.0.0-20181128220305-dedae6eb7c25 go.uber.org/zap v1.9.1 - golang.org/x/crypto v0.0.0-20190228161510-8dd112bcdc25 // indirect - golang.org/x/net v0.0.0-20190301231341-16b79f2e4e95 // indirect - google.golang.org/grpc v1.19.0 // indirect - gopkg.in/ini.v1 v1.42.0 // indirect + golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 // indirect + golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e // indirect + golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 // indirect + google.golang.org/appengine v1.4.0 // indirect + gopkg.in/ini.v1 v1.41.0 // indirect gopkg.in/yaml.v2 v2.2.2 + gotest.tools v2.2.0+incompatible // indirect ) + +replace github.com/docker/docker v1.13.1 => github.com/docker/engine v0.0.0-20181106193140-f5749085e9cb diff --git a/go.sum b/go.sum index c71118e..4429e94 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,19 @@ -cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/Masterminds/squirrel v1.1.0 h1:baP1qLdoQCeTw3ifCdOq2dkYc6vGcmRdaociKLbEJXs= -github.com/Masterminds/squirrel v1.1.0/go.mod h1:yaPeOnPG5ZRwL9oKdTsO/prlkPbXWZlRVMQ/gGlzIuA= +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 h1:w+iIsaOQNcT7OZ575w+acHgRric5iCyQh+xv+KJ4HB8= +github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8= +github.com/Masterminds/squirrel v0.0.0-20181204161840-e5bf00f96d4a h1:pMmt05odIWMlrx89uWavde2DDX8SXzaYnbGW+knFeU0= +github.com/Masterminds/squirrel v0.0.0-20181204161840-e5bf00f96d4a/go.mod h1:xnKTFzjGUiZtiOagBsfnvomW+nJg2usB1ZpordQWqNM= +github.com/Microsoft/go-winio v0.4.11 h1:zoIOcVf0xPN1tnMVbTtEdI+P8OofVk3NObnwOQ6nK2Q= +github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= +github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= +github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLMYoU8P317H5OQ+Via4RmuPwCS0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bmatcuk/doublestar v1.1.1 h1:YroD6BJCZBYx06yYFEWvUuKVWQn3vLLQAVmDmvTSaiQ= github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= -github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/containerd/continuity v0.0.0-20181203112020-004b46473808 h1:4BX8f882bXEDKfWIf0wa8HRvpnBoPszJJXL+TVbBw4M= +github.com/containerd/continuity v0.0.0-20181203112020-004b46473808/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= github.com/coreos/go-semver v0.2.0 h1:3Jm3tLmsgAYcjC+4Up7hJrFBPr+n7rAqYeSw/SZazuY= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7 h1:u9SHYsPQNyt5tgDm3YN7+9dYrpK96E5wFilTFWIDZOM= @@ -18,6 +24,14 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug= +github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/engine v0.0.0-20181106193140-f5749085e9cb h1:PyjxRdW1mqCmSoxy/6uP01P7CGbsD+woX+oOWbaUPwQ= +github.com/docker/engine v0.0.0-20181106193140-f5749085e9cb/go.mod h1:3CPr2caMgTHxxIAZgEMd3uLYPDlRvPqCpyeRf6ncPcY= +github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= +github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= +github.com/docker/go-units v0.3.3 h1:Xk8S3Xj5sLGlG5g67hJmYMmUgXv5N4PhkjJHHqrwnTk= +github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 h1:qk/FSDDxo05wdJH28W+p5yivv7LuLYLRXPPD8KQCtZs= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -35,18 +49,24 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekf github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903 h1:LbsanbbD6LieFkXbj9YNNBupiGHJgFeLpO0j0Fza1h8= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a h1:ZJu5NB1Bk5ms4vw0Xu4i+jD32SE9jQXyfnOvwhHqlT0= +github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a h1:ZJu5NB1Bk5ms4vw0Xu4i+jD32SE9jQXyfnOvwhHqlT0= +github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e h1:JKmoR8x90Iww1ks85zJ1lfDGgIiMDuIptTOhJq+zKyg= github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8= +github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= +github.com/gorilla/handlers v1.4.0 h1:XulKRWSQK5uChr4pEgSE4Tc/OcmnU9GJuSwdog/tZsA= +github.com/gorilla/handlers v1.4.0/go.mod h1:Qkdc/uu4tH4g6mTK6auzZ766c4CA0Ng8+o/OAirnOIQ= +github.com/gorilla/mux v1.6.2 h1:Pgr17XVTNXAk3q/r4CpKzC5xBM/qW1uVLV+IhRZpIIk= +github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c h1:Lh2aW+HnU2Nbe1gqD9SOJLJxW1jBMmQOktN2acDyJk8= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0 h1:Iju5GlWwrvL6UBg4zJJt3btmonfrMlCDdsejg4CZE7c= @@ -55,14 +75,18 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92Bcuy github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.4.1 h1:pX7cnDwSSmG0dR9yNjCQSSpmsJOqFdT7SzVp5Yl9uVw= github.com/grpc-ecosystem/grpc-gateway v1.4.1/go.mod h1:RSKVYQBd5MCa4OVpNdGskqpgL2+G+NZTnrVHpWWfpdw= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-sockaddr v1.0.1 h1:eCkkJ5KOOktDvwbsE9KPyiBWaOfp1ZNy2gLHgL8PSBM= +github.com/hashicorp/go-sockaddr v1.0.1/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0hcPo= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= -github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/jtolds/gls v4.2.1+incompatible h1:fSuqC+Gmlu6l/ZYAoZzx2pyucC8Xza35fpRVWLVmUEE= +github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= github.com/kr/pty v1.0.0 h1:jR04h3bskdxb8xt+5B6MoxPwDhMCe0oEgxug4Ca1YSA= @@ -74,6 +98,7 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm github.com/lib/pq v1.0.0 h1:X5PMW56eZitiTeO7tKzZxFCSpbFZJtkMMooicw2us9A= github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-sqlite3 v1.10.0 h1:jbhqpg7tQe4SupckyijYiy0mJJ/pRyHvXf7JdWK860o= @@ -82,17 +107,28 @@ github.com/matttproud/golang_protobuf_extensions v1.0.0 h1:YNOwxxSJzSUARoD9KRZLz github.com/matttproud/golang_protobuf_extensions v1.0.0/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/minio/minio-go v6.0.14+incompatible h1:fnV+GD28LeqdN6vT2XdGKW8Qe/IfjJDswNVuni6km9o= github.com/minio/minio-go v6.0.14+incompatible/go.mod h1:7guKYtitv8dktvNUGrhzmNlA5wrAABTQXCoesZdFQO8= +github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.4.2 h1:3mYCb7aPxS/RU7TI1y4rkEn1oKmPRjNJLNEXgw7MH2I= github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ= +github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s= +github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= +github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= +github.com/opencontainers/runc v0.1.1 h1:GlxAyO6x8rfZYN9Tt0Kti5a/cP41iuiO2yYT0IJGY8Y= +github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/prometheus/client_golang v0.8.0 h1:1921Yw9Gc3iSc4VQh3PIoOqgPCZS7G/4xQNVUp8Mda8= github.com/prometheus/client_golang v0.8.0/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_model v0.0.0-20170216185247-6f3806018612 h1:13pIdM2tpaDi4OVe24fgoIS7ZTqMt0QI+bwQsX5hq+g= @@ -101,6 +137,7 @@ github.com/prometheus/common v0.0.0-20180518154759-7600349dcfe1 h1:osmNoEW2SCW3L github.com/prometheus/common v0.0.0-20180518154759-7600349dcfe1/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= github.com/prometheus/procfs v0.0.0-20180612222113-7d6f385de8be h1:MoyXp/VjXUwM0GyDcdwT7Ubea2gxOSHpPaFo3qV+Y2A= github.com/prometheus/procfs v0.0.0-20180612222113-7d6f385de8be/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sanity-io/litter v1.1.0 h1:BllcKWa3VbZmOZbDCoszYLk7zCsKHz5Beossi8SUcTc= github.com/sanity-io/litter v1.1.0/go.mod h1:CJ0VCw2q4qKU7LaQr3n7UOSHzgEMgcGco7N/SkZQPjw= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= @@ -109,10 +146,10 @@ github.com/sgotti/gexpect v0.0.0-20161123102107-0afc6c19f50a h1:u7WP9TGHJIkJoi/d github.com/sgotti/gexpect v0.0.0-20161123102107-0afc6c19f50a/go.mod h1:HvB0+YQff1QGS1nct9E3/J8wo8s/EVjq+VXrJSDlQEY= github.com/sirupsen/logrus v1.0.5 h1:8c8b5uO0zS4X6RPl/sd1ENwSkIc0/H2PaHxE3udaE8I= github.com/sirupsen/logrus v1.0.5/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff h1:86HlEv0yBCry9syNuylzqznKXDK11p6D0DT596yNMys= -github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff/go.mod h1:KSQcGKpxUMHk3nbYzs/tIBAM2iDooCn0BmttHOJEbLs= +github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304 h1:Jpy1PXuP99tXNrhbq2BaPz9B+jNAvH1JPQQpG/9GCXY= +github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c h1:Ho+uVpkel/udgjbwB5Lktg9BtvJSh2DT0Hi6LPSyI2w= +github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s= github.com/soheilhy/cmux v0.1.4 h1:0HKaf1o97UwFjHH9o5XsHUOF+tqmdA7KEzXLpiyaw0E= github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= github.com/spf13/cobra v0.0.3 h1:ZlrZ4XsMRm04Fr5pSFxBgfND2EBVa1nLpiy1stUsX/8= @@ -139,33 +176,28 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/ go.uber.org/zap v1.9.1 h1:XCJQEf3W6eZaVwhRBof6ImoYGJSITeKWsyeh3HFu/5o= go.uber.org/zap v1.9.1/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= golang.org/x/crypto v0.0.0-20180608092829-8ac0e0d97ce4/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20190228161510-8dd112bcdc25 h1:jsG6UpNLt9iAsb0S2AGW28DveNzzgmbXR+ENoPjUeIU= -golang.org/x/crypto v0.0.0-20190228161510-8dd112bcdc25/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= -golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0= +golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190301231341-16b79f2e4e95 h1:fY7Dsw114eJN4boqzVSbpVHO6rTdhq6/GnXeu+PKnzU= -golang.org/x/net v0.0.0-20190301231341-16b79f2e4e95/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e h1:bRhVy7zSSasaqNksaRZiA5EEI+Ei4I1nO5Jh72wfHlg= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 h1:YUO/7uOKsKeq9UokNS62b8FYywz3ker1l1vDZRCRefw= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e h1:o3PsSEY8E4eXWkXrIP9YJALUkVZqzHJT5DOasTyn8Vs= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2 h1:+DCIGbF/swA92ohVg0//6X2IVY3KZs6p9mix0ziNYJM= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -google.golang.org/appengine v1.1.0 h1:igQkv0AAhEIvTEpD5LIpAfav2eeVO9HBTjvKHVJPRSs= -google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180608181217-32ee49c4dd80 h1:GL7nK1hkDKrkor0eVOYcMdIsUGErFnaC2gpBOVC+vbI= google.golang.org/genproto v0.0.0-20180608181217-32ee49c4dd80/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8 h1:Nw54tB0rB7hY/N0NQvRW8DG4Yk3Q6T9cu9RcFQDu1tc= -google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/grpc v1.14.0 h1:ArxJuB1NWfPY6r9Gp9gqwplT0Ge7nqv9msgu03lHLmo= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= -google.golang.org/grpc v1.19.0 h1:cfg4PD8YEdSFnm7qLV4++93WcmhH2nIUhMjhdCvl3j8= -google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= gopkg.in/airbrake/gobrake.v2 v2.0.9 h1:7z2uVWwn7oVeeugY1DtlPAy5H+KYgB1KeKTnqjNatLo= gopkg.in/airbrake/gobrake.v2 v2.0.9/go.mod h1:/h5ZAUhDkGaJfjzjKLSjv6zCL6O0LLBxU4K+aSYdM/U= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= @@ -175,11 +207,12 @@ gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2 h1:OAj3g0cR6Dx/R07QgQe8wkA9RNjB2u4i700xBkIT4e0= gopkg.in/gemnasium/logrus-airbrake-hook.v2 v2.1.2/go.mod h1:Xk6kEKp8OKb+X14hQBKWaSkCsqBpgog8nAV2xsGOxlo= -gopkg.in/ini.v1 v1.42.0 h1:7N3gPTt50s8GuLortA00n8AqRTk75qOP98+mTPpgzRk= -gopkg.in/ini.v1 v1.42.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/ini.v1 v1.41.0 h1:Ka3ViY6gNYSKiVy71zXBEqKplnV35ImDLVG+8uoIklE= +gopkg.in/ini.v1 v1.41.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= diff --git a/internal/runconfig/runconfig.go b/internal/runconfig/runconfig.go new file mode 100644 index 0000000..19223dd --- /dev/null +++ b/internal/runconfig/runconfig.go @@ -0,0 +1,351 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package runconfig + +import ( + "fmt" + "strings" + + "github.com/pkg/errors" + "github.com/sorintlab/agola/internal/config" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + + uuid "github.com/satori/go.uuid" +) + +func genRuntime(c *config.Config, runtimeName string) *types.Runtime { + ce := c.Runtime(runtimeName) + + containers := []*types.Container{} + for _, cc := range ce.Containers { + containers = append(containers, &types.Container{ + Image: cc.Image, + Environment: cc.Environment, + User: cc.User, + }) + } + return &types.Runtime{ + Type: types.RuntimeType(ce.Type), + Containers: containers, + } +} + +func stepFromConfigStep(csi interface{}) interface{} { + switch cs := csi.(type) { + case *config.CloneStep: + // transform a "clone" step in a "run" step command + rs := &config.RunStep{} + + rs.Type = "run" + rs.Name = "Clone repository and checkout code" + rs.Command = ` +set -x + +mkdir ~/.ssh +chmod 700 ~/.ssh +touch ~/.ssh/id_rsa +chmod 600 ~/.ssh/id_rsa + +# Add repository deploy key +(cat < ~/.ssh/id_rsa +$AGOLA_SSHPRIVKEY +EOF +) + +if [ -n "$AGOLA_SKIPSSHHOSTKEYCHECK" ]; then + # Disable git host key verification + (cat < ~/.ssh/config +Host $AGOLA_GIT_HOST + HostName $AGOLA_GIT_HOST + StrictHostKeyChecking no + UserKnownHostsFile /dev/null +EOF + ) +fi + +git clone $AGOLA_REPOSITORY_URL . +git fetch origin $AGOLA_GIT_REF + +if [ -n "$AGOLA_GIT_COMMITSHA" ]; then + git checkout $AGOLA_GIT_COMMITSHA +else + git checkout FETCH_HEAD +fi +` + + return rs + + case *config.RunStep: + rs := &types.RunStep{} + + rs.Type = cs.Type + rs.Name = cs.Name + rs.Command = cs.Command + rs.Environment = cs.Environment + rs.WorkingDir = cs.WorkingDir + rs.Shell = cs.Shell + rs.User = cs.User + return rs + + case *config.SaveToWorkspaceStep: + sws := &types.SaveToWorkspaceStep{} + + sws.Type = cs.Type + sws.Name = cs.Name + + sws.Contents = make([]types.SaveToWorkspaceContent, len(cs.Contents)) + for i, csc := range cs.Contents { + sc := types.SaveToWorkspaceContent{} + sc.SourceDir = csc.SourceDir + sc.DestDir = csc.DestDir + sc.Paths = csc.Paths + + sws.Contents[i] = sc + } + return sws + + case *config.RestoreWorkspaceStep: + rws := &types.RestoreWorkspaceStep{} + rws.Name = cs.Name + rws.Type = cs.Type + rws.DestDir = cs.DestDir + + return rws + + default: + panic(fmt.Errorf("unknown config step type: %s", util.Dump(cs))) + } +} + +// GenRunConfig generates a run config from a pipeline in the config, expanding all the references to tasks +// this functions assumes that the config is already checked for possible errors (i.e referenced task must exits) +func GenRunConfig(c *config.Config, pipelineName string, env map[string]string) *types.RunConfig { + cp := c.Pipeline(pipelineName) + + rc := &types.RunConfig{ + Name: cp.Name, + Tasks: make(map[string]*types.RunConfigTask), + Environment: env, + } + + for _, cpe := range cp.Elements { + // resolve referenced task + cpt := c.Task(cpe.Task) + + //environment := map[string]string{} + //if ct.Environment != nil { + // environment = ct.Environment + //} + //mergeEnv(environment, rd.DynamicEnvironment) + //// StaticEnvironment variables ovverride every other environment variable + //mergeEnv(environment, rd.Environment) + steps := make([]interface{}, len(cpt.Steps)) + for i, cpts := range cpt.Steps { + steps[i] = stepFromConfigStep(cpts) + } + + t := &types.RunConfigTask{ + ID: uuid.NewV4().String(), + // use the element name from the config as the task name + Name: cpe.Name, + Runtime: genRuntime(c, cpt.Runtime), + Environment: cpt.Environment, + WorkingDir: cpt.WorkingDir, + Shell: cpt.Shell, + User: cpt.User, + Steps: steps, + IgnoreFailure: cpe.IgnoreFailure, + } + + rc.Tasks[t.ID] = t + } + + // populate depends, needs to be done after having created all the tasks so we can resolve their id + for _, rct := range rc.Tasks { + cpe := cp.Elements[rct.Name] + + depends := make([]*types.RunConfigTaskDepend, len(cpe.Depends)) + for id, d := range cpe.Depends { + conditions := make([]types.RunConfigTaskDependCondition, len(d.Conditions)) + // when no conditions are defined default to on_success + if len(d.Conditions) == 0 { + conditions = append(conditions, types.RunConfigTaskDependConditionOnSuccess) + } else { + for ic, c := range d.Conditions { + var condition types.RunConfigTaskDependCondition + switch c { + case config.DependConditionOnSuccess: + condition = types.RunConfigTaskDependConditionOnSuccess + case config.DependConditionOnFailure: + condition = types.RunConfigTaskDependConditionOnFailure + } + conditions[ic] = condition + } + } + + drct := getRunConfigTaskByName(rc, d.ElementName) + depends[id] = &types.RunConfigTaskDepend{ + TaskID: drct.ID, + Conditions: conditions, + } + } + + rct.Depends = depends + } + + return rc +} + +func getRunConfigTaskByName(rc *types.RunConfig, name string) *types.RunConfigTask { + for _, rct := range rc.Tasks { + if rct.Name == name { + return rct + } + } + return nil +} + +func CheckRunConfig(rc *types.RunConfig) error { + // check circular dependencies + cerrs := &util.Errors{} + for _, t := range rc.Tasks { + allParents := GetAllParents(rc, t) + for _, parent := range allParents { + if parent.ID == t.ID { + // TODO(sgotti) get the parent that depends on task to report it + dep := []string{} + for _, parent := range allParents { + pparents := GetParents(rc, parent) + for _, pparent := range pparents { + if pparent.ID == t.ID { + dep = append(dep, fmt.Sprintf("%q", parent.Name)) + } + } + } + cerrs.Append(errors.Errorf("circular dependency between task %q and tasks %s", t.Name, strings.Join(dep, " "))) + } + } + } + if cerrs.IsErr() { + return cerrs + } + + // check that the task and its parent don't have a common dependency + for _, t := range rc.Tasks { + parents := GetParents(rc, t) + for _, parent := range parents { + allParents := GetAllParents(rc, t) + allParentParents := GetAllParents(rc, parent) + for _, p := range allParents { + for _, pp := range allParentParents { + if p.ID == pp.ID { + return errors.Errorf("task %s and its parent %s have both a dependency on task %s", t.Name, parent.Name, p.Name) + } + } + } + } + } + + return nil +} + +func GenTasksLevels(rc *types.RunConfig) error { + // reset all task level + for _, t := range rc.Tasks { + t.Level = -1 + } + + level := 0 + for { + c := 0 + for _, t := range rc.Tasks { + // skip tasks with the level already set + if t.Level != -1 { + continue + } + + parents := GetParents(rc, t) + ok := true + for _, p := range parents { + // * skip if the parent doesn't have a level yet + // * skip if the parent has a level equal than the current one (this happens when + // we have just set a level to a task in this same level loop) + if p.Level == -1 || p.Level >= level { + ok = false + } + } + if ok { + t.Level = level + c++ + } + } + + // if no tasks were updated in this level we can stop here + if c == 0 { + break + } + level++ + } + for _, t := range rc.Tasks { + if t.Level == -1 { + return errors.Errorf("circular dependency detected") + } + } + return nil +} + +// GetParents returns direct parents of task. +func GetParents(rc *types.RunConfig, task *types.RunConfigTask) []*types.RunConfigTask { + parents := []*types.RunConfigTask{} + for _, t := range rc.Tasks { + isParent := false + for _, d := range task.Depends { + if d.TaskID == t.ID { + isParent = true + } + } + if isParent { + parents = append(parents, t) + } + } + return parents +} + +// GetAllParents returns all the parents (both direct and ancestors) of task. +// In case of circular dependency it won't loop forever but will also return +// task as parent of itself +func GetAllParents(rc *types.RunConfig, task *types.RunConfigTask) []*types.RunConfigTask { + pMap := map[string]*types.RunConfigTask{} + nextParents := GetParents(rc, task) + + for len(nextParents) > 0 { + parents := nextParents + nextParents = []*types.RunConfigTask{} + for _, parent := range parents { + if _, ok := pMap[parent.ID]; ok { + continue + } + pMap[parent.ID] = parent + nextParents = append(nextParents, GetParents(rc, parent)...) + } + } + + parents := make([]*types.RunConfigTask, 0, len(pMap)) + for _, v := range pMap { + parents = append(parents, v) + } + return parents +} diff --git a/internal/runconfig/runconfig_test.go b/internal/runconfig/runconfig_test.go new file mode 100644 index 0000000..28659e4 --- /dev/null +++ b/internal/runconfig/runconfig_test.go @@ -0,0 +1,684 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package runconfig + +import ( + "fmt" + "reflect" + "testing" + + "github.com/pkg/errors" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" +) + +func TestGenTasksLevels(t *testing.T) { + type task struct { + ID string + Level int + Depends []*types.RunConfigTaskDepend + } + tests := []struct { + name string + in []task + out []task + err error + }{ + { + name: "test single task", + in: []task{ + { + ID: "1", + Level: -1, + }, + }, + out: []task{ + { + ID: "1", + Level: 0, + }, + }, + }, + { + name: "test multiple root tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + }, + }, + out: []task{ + { + ID: "1", + Level: 0, + }, + { + ID: "2", + Level: 0, + }, + }, + }, + { + name: "test dependency between two tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + out: []task{ + { + ID: "1", + Level: 0, + }, + { + ID: "2", + Level: 1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + }, + { + name: "Test circular dependency between two tasks: a -> b -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + err: fmt.Errorf("circular dependency detected"), + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + err: fmt.Errorf("circular dependency detected"), + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> b", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + }, + err: fmt.Errorf("circular dependency detected"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + inRunConfig := &types.RunConfig{Tasks: map[string]*types.RunConfigTask{}} + for _, t := range tt.in { + inRunConfig.Tasks[t.ID] = &types.RunConfigTask{ + ID: t.ID, + Level: t.Level, + Depends: t.Depends, + } + + } + outRunConfig := &types.RunConfig{Tasks: map[string]*types.RunConfigTask{}} + for _, t := range tt.out { + outRunConfig.Tasks[t.ID] = &types.RunConfigTask{ + ID: t.ID, + Level: t.Level, + Depends: t.Depends, + } + + } + if err := GenTasksLevels(inRunConfig); err != nil { + if err.Error() != tt.err.Error() { + t.Fatalf("got error: %v, want error: %v", err, tt.err) + } + return + } + if tt.err != nil { + t.Fatalf("got nil error, want error: %v", tt.err) + } + if !reflect.DeepEqual(inRunConfig.Tasks, outRunConfig.Tasks) { + t.Fatalf("got %s, expected %s", util.Dump(inRunConfig), util.Dump(outRunConfig)) + } + }) + } +} + +func TestGetAllParents(t *testing.T) { + type task struct { + ID string + Level int + Depends []*types.RunConfigTaskDepend + } + tests := []struct { + name string + in []task + out map[string][]string + }{ + { + name: "test single task", + in: []task{ + { + ID: "1", + Level: -1, + }, + }, + out: map[string][]string{ + "1": []string{}, + }, + }, + { + name: "test multiple root tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + }, + }, + out: map[string][]string{ + "1": []string{}, + "2": []string{}, + }, + }, + { + name: "test dependency from a task to itself", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + out: map[string][]string{ + "1": []string{"1"}, + }, + }, + { + name: "test dependency between two tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + out: map[string][]string{ + "1": []string{}, + "2": []string{"1"}, + }, + }, + { + name: "Test dependency between 5 tasks: a -> (b, c) -> (d, e)", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "4", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "5", + }, + }, + }, + { + ID: "4", + Level: -1, + }, + { + ID: "5", + Level: -1, + }, + }, + out: map[string][]string{ + "1": []string{"2", "3", "4", "5"}, + "2": []string{"4"}, + "3": []string{"5"}, + "4": []string{}, + "5": []string{}, + }, + }, + { + name: "Test circular dependency between two tasks: a -> b -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + out: map[string][]string{ + "1": []string{"2", "1"}, + "2": []string{"1", "2"}, + }, + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + out: map[string][]string{ + "1": []string{"2", "3", "1"}, + "2": []string{"3", "1", "2"}, + "3": []string{"1", "2", "3"}, + }, + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> b", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + }, + out: map[string][]string{ + "1": []string{"2", "3"}, + "2": []string{"3", "2"}, + "3": []string{"2", "3"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + inRunConfig := &types.RunConfig{Tasks: map[string]*types.RunConfigTask{}} + for _, t := range tt.in { + inRunConfig.Tasks[t.ID] = &types.RunConfigTask{ + ID: t.ID, + Level: t.Level, + Depends: t.Depends, + } + + } + + for _, task := range inRunConfig.Tasks { + allParents := GetAllParents(inRunConfig, task) + + allParentsList := []string{} + for _, p := range allParents { + allParentsList = append(allParentsList, p.ID) + } + if !util.CompareStringSliceNoOrder(tt.out[task.ID], allParentsList) { + t.Fatalf("task: %s, got %s, expected %s", task.ID, util.Dump(allParentsList), util.Dump(tt.out[task.ID])) + } + } + }) + } +} + +func TestCheckRunConfig(t *testing.T) { + type task struct { + ID string + Level int + Depends []*types.RunConfigTaskDepend + } + tests := []struct { + name string + in []task + err error + }{ + { + name: "test single task", + in: []task{ + { + ID: "1", + Level: -1, + }, + }, + }, + { + name: "test multiple root tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + }, + }, + }, + { + name: "test dependency between two tasks", + in: []task{ + { + ID: "1", + Level: -1, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + }, + { + name: "Test circular dependency between two tasks: a -> b -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + err: &util.Errors{ + Errs: []error{ + errors.Errorf("circular dependency between task %q and tasks %q", "task1", "task2"), + errors.Errorf("circular dependency between task %q and tasks %q", "task2", "task1"), + }, + }, + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> a", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "1", + }, + }, + }, + }, + err: &util.Errors{ + Errs: []error{ + errors.Errorf("circular dependency between task %q and tasks %q", "task1", "task3"), + errors.Errorf("circular dependency between task %q and tasks %q", "task2", "task1"), + errors.Errorf("circular dependency between task %q and tasks %q", "task3", "task2"), + }, + }, + }, + { + name: "Test circular dependency between 3 tasks: a -> b -> c -> b", + in: []task{ + { + ID: "1", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + { + ID: "2", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "3", + }, + }, + }, + { + ID: "3", + Level: -1, + Depends: []*types.RunConfigTaskDepend{ + &types.RunConfigTaskDepend{ + TaskID: "2", + }, + }, + }, + }, + err: &util.Errors{ + Errs: []error{ + errors.Errorf("circular dependency between task %q and tasks %q", "task2", "task3"), + errors.Errorf("circular dependency between task %q and tasks %q", "task3", "task2"), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + inRunConfig := &types.RunConfig{Tasks: map[string]*types.RunConfigTask{}} + for _, t := range tt.in { + inRunConfig.Tasks[t.ID] = &types.RunConfigTask{ + Name: fmt.Sprintf("task%s", t.ID), + ID: t.ID, + Level: t.Level, + Depends: t.Depends, + } + + } + + if err := CheckRunConfig(inRunConfig); err != nil { + if errs, ok := err.(*util.Errors); ok { + if !errs.Equal(tt.err) { + t.Fatalf("got error: %v, want error: %v", err, tt.err) + } + } else { + if err.Error() != tt.err.Error() { + t.Fatalf("got error: %v, want error: %v", err, tt.err) + } + } + return + } + if tt.err != nil { + t.Fatalf("got nil error, want error: %v", tt.err) + } + }) + } +} diff --git a/internal/services/runservice/executor/api.go b/internal/services/runservice/executor/api.go new file mode 100644 index 0000000..c60a603 --- /dev/null +++ b/internal/services/runservice/executor/api.go @@ -0,0 +1,217 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package executor + +import ( + "bufio" + "encoding/json" + "io" + "net/http" + "os" + "strconv" + "time" + + "github.com/pkg/errors" + "github.com/sorintlab/agola/internal/services/runservice/types" + "go.uber.org/zap" +) + +type taskSubmissionHandler struct { + c chan<- *types.ExecutorTask +} + +func NewTaskSubmissionHandler(c chan<- *types.ExecutorTask) *taskSubmissionHandler { + return &taskSubmissionHandler{c: c} +} + +func (h *taskSubmissionHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + var et *types.ExecutorTask + d := json.NewDecoder(r.Body) + + if err := d.Decode(&et); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } + + h.c <- et +} + +type logsHandler struct { + log *zap.SugaredLogger + e *Executor +} + +func NewLogsHandler(logger *zap.Logger, e *Executor) *logsHandler { + return &logsHandler{ + log: logger.Sugar(), + e: e, + } +} + +func (h *logsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // TODO(sgotti) Check authorized call from scheduler + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + taskID := r.URL.Query().Get("taskid") + if taskID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + s := r.URL.Query().Get("step") + if s == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + step, err := strconv.Atoi(s) + if err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + follow := false + _, ok := r.URL.Query()["follow"] + if ok { + follow = true + } + + if err := h.readTaskLogs(taskID, step, w, follow); err != nil { + h.log.Errorf("err: %+v", err) + } +} + +func (h *logsHandler) readTaskLogs(taskID string, step int, w http.ResponseWriter, follow bool) error { + logPath := h.e.logPath(taskID, step) + return h.readLogs(taskID, step, logPath, w, follow) +} + +func (h *logsHandler) readLogs(taskID string, step int, logPath string, w http.ResponseWriter, follow bool) error { + f, err := os.Open(logPath) + if err != nil { + if os.IsNotExist(err) { + http.Error(w, "", http.StatusNotFound) + } else { + http.Error(w, "", http.StatusInternalServerError) + } + return err + } + defer f.Close() + + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + br := bufio.NewReader(f) + + var flusher http.Flusher + if fl, ok := w.(http.Flusher); ok { + flusher = fl + } + stop := false + flushstop := false + for { + if stop { + return nil + } + data, err := br.ReadBytes('\n') + if err != nil { + if err != io.EOF { + return err + } + if !flushstop && follow { + if _, err := f.Seek(-int64(len(data)), io.SeekCurrent); err != nil { + return errors.Wrapf(err, "failed to seek in log file %q", logPath) + } + // check if the step is finished, is so flush until EOF and stop + rt, ok := h.e.runningTasks.get(taskID) + if !ok { + flushstop = true + } else { + rt.Lock() + if rt.et.Status.Steps[step].Phase.IsFinished() { + flushstop = true + } + rt.Unlock() + } + // TODO(sgotti) use ionotify/fswatcher? + time.Sleep(500 * time.Millisecond) + continue + } else { + stop = true + } + } + if _, err := w.Write(data); err != nil { + return err + } + if flusher != nil { + flusher.Flush() + } + } +} + +type archivesHandler struct { + e *Executor +} + +func NewArchivesHandler(e *Executor) *archivesHandler { + return &archivesHandler{e: e} +} + +func (h *archivesHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // TODO(sgotti) Check authorized call from scheduler + + taskID := r.URL.Query().Get("taskid") + if taskID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + s := r.URL.Query().Get("step") + if s == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + step, err := strconv.Atoi(s) + if err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + w.Header().Set("Cache-Control", "no-cache") + + if err := h.readArchive(taskID, step, w); err != nil { + if os.IsNotExist(err) { + http.Error(w, "", http.StatusNotFound) + } else { + http.Error(w, "", http.StatusInternalServerError) + } + return + } +} + +func (h *archivesHandler) readArchive(taskID string, step int, w io.Writer) error { + archivePath := h.e.archivePath(taskID, step) + + f, err := os.Open(archivePath) + if err != nil { + return err + } + defer f.Close() + + br := bufio.NewReader(f) + + _, err = io.Copy(w, br) + return err +} diff --git a/internal/services/runservice/executor/driver/docker.go b/internal/services/runservice/executor/driver/docker.go new file mode 100644 index 0000000..94be9c3 --- /dev/null +++ b/internal/services/runservice/executor/driver/docker.go @@ -0,0 +1,468 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package driver + +import ( + "context" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + + "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/filters" + "github.com/docker/docker/client" + "github.com/docker/docker/pkg/archive" + "github.com/docker/docker/pkg/stdcopy" + uuid "github.com/satori/go.uuid" + "go.uber.org/zap" +) + +type DockerDriver struct { + logger *zap.Logger + client *client.Client + initVolumeHostDir string +} + +func NewDockerDriver(logger *zap.Logger, initVolumeHostDir string) (*DockerDriver, error) { + cli, err := client.NewEnvClient() + if err != nil { + return nil, err + } + return &DockerDriver{ + logger: logger, + client: cli, + initVolumeHostDir: initVolumeHostDir, + }, nil +} + +// CopyToolbox is an hack needed when running the executor inside a docker +// container. It copies the agola-toolbox binaries from the container to an +// host path so it can be bind mounted to the other containers +func (d *DockerDriver) CopyToolbox(ctx context.Context, toolboxPath string) error { + // by default always try to pull the image so we are sure only authorized users can fetch them + // see https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#alwayspullimages + reader, err := d.client.ImagePull(ctx, "busybox", types.ImagePullOptions{}) + if err != nil { + return err + } + io.Copy(os.Stdout, reader) + + resp, err := d.client.ContainerCreate(ctx, &container.Config{ + Entrypoint: []string{"cat"}, + Image: "busybox", + Tty: true, + }, &container.HostConfig{ + Binds: []string{fmt.Sprintf("%s:%s", d.initVolumeHostDir, "/tmp/agola")}, + }, nil, "") + if err != nil { + return err + } + + containerID := resp.ID + + if err := d.client.ContainerStart(ctx, containerID, types.ContainerStartOptions{}); err != nil { + return err + } + + srcInfo, err := archive.CopyInfoSourcePath(toolboxPath, false) + if err != nil { + return err + } + + srcArchive, err := archive.TarResource(srcInfo) + if err != nil { + return err + } + defer srcArchive.Close() + + options := types.CopyToContainerOptions{ + AllowOverwriteDirWithFile: false, + CopyUIDGID: false, + } + + if err := d.client.CopyToContainer(ctx, containerID, "/tmp/agola", srcArchive, options); err != nil { + return err + } + + // ignore remove error + d.client.ContainerRemove(ctx, containerID, types.ContainerRemoveOptions{Force: true}) + + return nil +} + +func (d *DockerDriver) NewPod(ctx context.Context, podConfig *PodConfig) (Pod, error) { + if len(podConfig.Containers) == 0 { + return nil, errors.Errorf("empty container config") + } + + containerConfig := podConfig.Containers[0] + + // by default always try to pull the image so we are sure only authorized users can fetch them + // see https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#alwayspullimages + reader, err := d.client.ImagePull(ctx, containerConfig.Image, types.ImagePullOptions{}) + if err != nil { + return nil, err + } + io.Copy(os.Stdout, reader) + + podID := uuid.NewV4().String() + + labels := map[string]string{} + // prepend the podLabelPrefix to the labels' keys + for k, v := range podConfig.Labels { + labels[podLabelPrefix+k] = v + } + labels[agolaLabelKey] = agolaLabelValue + labels[podIDKey] = podID + + containerLabels := map[string]string{} + for k, v := range labels { + containerLabels[k] = v + } + containerLabels[containerIndexKey] = "0" + + resp, err := d.client.ContainerCreate(ctx, &container.Config{ + Entrypoint: containerConfig.Cmd, + Env: makeEnv(containerConfig.Env), + WorkingDir: containerConfig.WorkingDir, + Image: containerConfig.Image, + Tty: true, + Labels: containerLabels, + }, &container.HostConfig{ + Binds: []string{fmt.Sprintf("%s:%s", d.initVolumeHostDir, podConfig.InitVolumeDir)}, + ReadonlyPaths: []string{fmt.Sprintf("%s:%s", d.initVolumeHostDir, podConfig.InitVolumeDir)}, + }, nil, "") + if err != nil { + return nil, err + } + + containerID := resp.ID + + if err := d.client.ContainerStart(ctx, containerID, types.ContainerStartOptions{}); err != nil { + return nil, err + } + + args := filters.NewArgs() + for k, v := range labels { + args.Add("label", fmt.Sprintf("%s=%s", k, v)) + } + + containers, err := d.client.ContainerList(ctx, + types.ContainerListOptions{ + Filters: args, + }) + if err != nil { + return nil, err + } + if len(containers) == 0 { + return nil, errors.Errorf("no container with id %s", containerID) + } + + return &DockerPod{ + id: podID, + client: d.client, + containers: containers, + }, nil +} + +func (d *DockerDriver) GetPodsByLabels(ctx context.Context, labels map[string]string, all bool) ([]Pod, error) { + args := filters.NewArgs() + // search label adding the podLabelPrefix + for k, v := range labels { + args.Add("label", fmt.Sprintf("%s%s=%s", podLabelPrefix, k, v)) + } + + containers, err := d.client.ContainerList(ctx, + types.ContainerListOptions{ + Filters: args, + All: all, + }) + if err != nil { + return nil, err + } + + podsMap := map[string]*DockerPod{} + for _, container := range containers { + podID, ok := container.Labels[podIDKey] + if !ok { + // skip container + continue + } + if pod, ok := podsMap[podID]; !ok { + pod := &DockerPod{ + id: podID, + client: d.client, + containers: []types.Container{container}, + } + podsMap[podID] = pod + + } else { + pod.containers = append(pod.containers, container) + } + } + + // Put the containers in the right order based on their containerIndexKey label value + for _, container := range containers { + podID, ok := container.Labels[podIDKey] + if !ok { + // skip container + continue + } + cIndexStr, ok := container.Labels[containerIndexKey] + if !ok { + // remove pod since some of its containers don't have the right labels + delete(podsMap, podID) + } + cIndex, err := strconv.Atoi(cIndexStr) + if err != nil { + // remove pod since some of its containers don't have the right labels + delete(podsMap, podID) + } + pod := podsMap[podID] + pod.containers[cIndex] = container + + // overwrite containers with the right order + + // add labels from the container with index 0 + if cIndex == 0 { + podLabels := map[string]string{} + for labelName, labelValue := range container.Labels { + if strings.HasPrefix(labelName, podLabelPrefix) { + podLabels[strings.TrimPrefix(labelName, podLabelPrefix)] = labelValue + } + } + pod.labels = podLabels + } + } + + pods := make([]Pod, 0, len(podsMap)) + for _, pod := range podsMap { + pods = append(pods, pod) + } + return pods, nil +} + +func podLabelsFromContainer(containerLabels map[string]string) map[string]string { + labels := map[string]string{} + for k, v := range containerLabels { + if strings.HasPrefix(k, podLabelPrefix) { + labels[strings.TrimPrefix(k, podLabelPrefix)] = v + } + } + return labels +} + +func (d *DockerDriver) GetPodByID(ctx context.Context, containerID string) (Pod, error) { + args := filters.NewArgs() + args.Add(podIDKey, containerID) + + containers, err := d.client.ContainerList(ctx, + types.ContainerListOptions{ + Filters: args, + }) + if err != nil { + return nil, err + } + if len(containers) == 0 { + return nil, errors.Errorf("no container with id %s", containerID) + } + + return &DockerPod{ + labels: podLabelsFromContainer(containers[0].Labels), + client: d.client, + containers: containers, + }, nil +} + +type DockerPod struct { + id string + client *client.Client + labels map[string]string + containers []types.Container +} + +func (dp *DockerPod) ID() string { + return dp.id +} + +func (dp *DockerPod) Labels() map[string]string { + return dp.labels +} + +func (dp *DockerPod) Stop(ctx context.Context) error { + d := 1 * time.Second + errs := []error{} + for _, container := range dp.containers { + if err := dp.client.ContainerStop(ctx, container.ID, &d); err != nil { + errs = append(errs, err) + } + } + if len(errs) != 0 { + return errors.Errorf("stop errors: %v", errs) + } + return nil +} + +func (dp *DockerPod) Remove(ctx context.Context) error { + errs := []error{} + for _, container := range dp.containers { + if err := dp.client.ContainerRemove(ctx, container.ID, types.ContainerRemoveOptions{Force: true}); err != nil { + errs = append(errs, err) + } + } + if len(errs) != 0 { + return errors.Errorf("remove errors: %v", errs) + } + return nil +} + +func (dp *DockerPod) CopyTo(ctx context.Context, srcPath, dstPath string) error { + srcInfo, err := archive.CopyInfoSourcePath(srcPath, false) + if err != nil { + return err + } + + srcArchive, err := archive.TarResource(srcInfo) + if err != nil { + return err + } + defer srcArchive.Close() + + options := types.CopyToContainerOptions{ + AllowOverwriteDirWithFile: false, + CopyUIDGID: false, + } + + return dp.client.CopyToContainer(ctx, dp.containers[0].ID, dstPath, srcArchive, options) +} + +type DockerContainerExec struct { + execID string + hresp *types.HijackedResponse + client *client.Client + endCh chan error + + stdin io.WriteCloser +} + +// Stdin is a wrapped HikackedResponse implementing io.WriteCloser so users can +// easily close stdin. Internally it will close only the write side of the conn. +type Stdin struct { + hresp *types.HijackedResponse +} + +func (s *Stdin) Write(p []byte) (int, error) { + return s.hresp.Conn.Write(p) +} + +func (s *Stdin) Close() error { + return s.hresp.CloseWrite() +} + +func (dc *DockerPod) Exec(ctx context.Context, execConfig *ExecConfig) (ContainerExec, error) { + endCh := make(chan error) + + dockerExecConfig := types.ExecConfig{ + Cmd: execConfig.Cmd, + //Cmd: []string{"/bin/bash", "-s"}, + Env: makeEnv(execConfig.Env), + Tty: execConfig.Tty, + WorkingDir: execConfig.WorkingDir, + AttachStdin: true, + AttachStdout: execConfig.Stdout != nil, + AttachStderr: execConfig.Stderr != nil, + User: execConfig.User, + } + + response, err := dc.client.ContainerExecCreate(ctx, dc.containers[0].ID, dockerExecConfig) + if err != nil { + return nil, err + } + execStartCheck := types.ExecStartCheck{ + Detach: dockerExecConfig.Detach, + Tty: dockerExecConfig.Tty, + } + hresp, err := dc.client.ContainerExecAttach(ctx, response.ID, execStartCheck) + if err != nil { + return nil, err + } + + stdout := execConfig.Stdout + stderr := execConfig.Stderr + if execConfig.Stdout == nil { + stdout = ioutil.Discard + } + if execConfig.Stderr == nil { + stderr = ioutil.Discard + } + + // copy both stdout and stderr to out file + go func() { + var err error + if execConfig.Tty { + _, err = io.Copy(stdout, hresp.Reader) + } else { + _, err = stdcopy.StdCopy(stdout, stderr, hresp.Reader) + } + endCh <- err + }() + + stdin := &Stdin{ + hresp: &hresp, + } + + return &DockerContainerExec{ + execID: response.ID, + hresp: &hresp, + client: dc.client, + stdin: stdin, + endCh: endCh, + }, nil +} + +func (e *DockerContainerExec) Wait(ctx context.Context) (int, error) { + // ignore error, we'll use the exit code of the exec + <-e.endCh + + resp, err := e.client.ContainerExecInspect(ctx, e.execID) + if err != nil { + return -1, err + } + exitCode := resp.ExitCode + + e.hresp.Close() + + return exitCode, nil +} + +func (e *DockerContainerExec) Stdin() io.WriteCloser { + return e.stdin +} + +func makeEnv(env map[string]string) []string { + envList := make([]string, 0, len(env)) + for k, v := range env { + envList = append(envList, fmt.Sprintf("%s=%s", k, v)) + } + + return envList +} diff --git a/internal/services/runservice/executor/driver/docker_test.go b/internal/services/runservice/executor/driver/docker_test.go new file mode 100644 index 0000000..77518cb --- /dev/null +++ b/internal/services/runservice/executor/driver/docker_test.go @@ -0,0 +1,242 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package driver + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "io/ioutil" + "os" + "strings" + "testing" + "unicode" + + slog "github.com/sorintlab/agola/internal/log" + + "github.com/google/go-cmp/cmp" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +var level = zap.NewAtomicLevelAt(zapcore.InfoLevel) +var logger = slog.New(level) +var log = logger.Sugar() + +func parseEnv(envvar string) (string, string, error) { + // trim white spaces at the start + envvar = strings.TrimLeftFunc(envvar, unicode.IsSpace) + arr := strings.SplitN(envvar, "=", 2) + varname := arr[0] + if varname == "" { + return "", "", fmt.Errorf("invalid environment variable definition: %s", envvar) + } + if len(arr) > 1 { + if arr[1] == "" { + return "", "", fmt.Errorf("invalid environment variable definition: %s", envvar) + } + return varname, arr[1], nil + } + return varname, "", nil +} + +func parseEnvs(r io.Reader) (map[string]string, error) { + envs := map[string]string{} + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + envname, envvalue, err := parseEnv(scanner.Text()) + if err != nil { + return nil, err + } + envs[envname] = envvalue + } + if err := scanner.Err(); err != nil { + return nil, err + } + + return envs, nil +} + +func TestPod(t *testing.T) { + if os.Getenv("SKIP_DOCKER_TESTS") == "1" { + t.Skip("skipping since env var SKIP_DOCKER_TESTS is 1") + } + + dir, err := ioutil.TempDir("", "agola") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + defer os.RemoveAll(dir) + + d, err := NewDockerDriver(logger, dir) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + ctx := context.Background() + + t.Run("create a pod with one container", func(t *testing.T) { + pod, err := d.NewPod(ctx, &PodConfig{ + Containers: []*ContainerConfig{ + &ContainerConfig{ + Cmd: []string{"cat"}, + Image: "busybox", + }, + }, + InitVolumeDir: "/tmp/agola", + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + defer pod.Remove(ctx) + }) + + t.Run("execute a command inside a pod", func(t *testing.T) { + pod, err := d.NewPod(ctx, &PodConfig{ + Containers: []*ContainerConfig{ + &ContainerConfig{ + Cmd: []string{"cat"}, + Image: "busybox", + }, + }, + InitVolumeDir: "/tmp/agola", + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + ce, err := pod.Exec(ctx, &ExecConfig{ + Cmd: []string{"ls"}, + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + ce.Stdin().Close() + code, err := ce.Wait(ctx) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if code != 0 { + t.Fatalf("unexpected exito code: %d", code) + } + + defer pod.Remove(ctx) + }) + + t.Run("test pod environment", func(t *testing.T) { + env := map[string]string{ + "ENV01": "ENVVALUE01", + "ENV02": "ENVVALUE02", + } + + pod, err := d.NewPod(ctx, &PodConfig{ + Containers: []*ContainerConfig{ + &ContainerConfig{ + Cmd: []string{"cat"}, + Image: "busybox", + Env: env, + }, + }, + InitVolumeDir: "/tmp/agola", + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + var buf bytes.Buffer + ce, err := pod.Exec(ctx, &ExecConfig{ + Cmd: []string{"env"}, + Stdout: &buf, + Stderr: &buf, + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + ce.Stdin().Close() + code, err := ce.Wait(ctx) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if code != 0 { + t.Fatalf("unexpected exit code: %d", code) + } + + curEnv, err := parseEnvs(bytes.NewReader(buf.Bytes())) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + for n, e := range env { + if ce, ok := curEnv[n]; !ok { + t.Fatalf("missing env var %s", n) + } else { + if ce != e { + t.Fatalf("different env var %s value, want: %q, got %q", n, e, ce) + } + } + } + + defer pod.Remove(ctx) + }) + + t.Run("test get pods by label", func(t *testing.T) { + pod, err := d.NewPod(ctx, &PodConfig{ + Containers: []*ContainerConfig{ + &ContainerConfig{ + Cmd: []string{"cat"}, + Image: "busybox", + }, + }, + InitVolumeDir: "/tmp/agola", + }) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + pods, err := d.GetPodsByLabels(ctx, map[string]string{}, true) + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + + ok := false + for _, p := range pods { + if p.ID() == pod.ID() { + ok = true + ip := pod.(*DockerPod) + dp := p.(*DockerPod) + for i, c := range dp.containers { + if c.ID != ip.containers[i].ID { + t.Fatalf("different pod id, want: %s, got: %s", ip.id, dp.id) + } + if diff := cmp.Diff(ip.containers[i], c); diff != "" { + t.Error(diff) + } + } + } + } + if !ok { + t.Fatalf("pod with id %q not found", pod.ID()) + } + + defer pod.Remove(ctx) + }) + +} diff --git a/internal/services/runservice/executor/driver/driver.go b/internal/services/runservice/executor/driver/driver.go new file mode 100644 index 0000000..9184d00 --- /dev/null +++ b/internal/services/runservice/executor/driver/driver.go @@ -0,0 +1,90 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package driver + +import ( + "context" + "io" +) + +const ( + agolaLabelKey = "agola" + agolaLabelValue = "true" + + podIDKey = "podID" + containerIndexKey = "index" + taskKey = "task" + + podLabelPrefix = "podlabel_" +) + +// Driver is a generic interface around the pod concept (a group of "containers" +// sharing, at least, the same network namespace) +// It's just tailored aroun the need of an executor and should be quite generic +// to work with multiple implementations. For example: +// * Docker containers +// * Kubernetes pods +// * A Virtual Machine on which we execute multiple processes +type Driver interface { + NewPod(ctx context.Context, podConfig *PodConfig) (Pod, error) + GetPodsByLabels(ctx context.Context, labels map[string]string, all bool) ([]Pod, error) + GetPodByID(ctx context.Context, containerID string) (Pod, error) +} + +type Pod interface { + // ID returns the pod id + ID() string + // Labels returns the pod labels + Labels() map[string]string + // Stop stops the pod + Stop(ctx context.Context) error + // Stop stops the pod + Remove(ctx context.Context) error + // Exec executes a command inside the first container in the Pod + Exec(ctx context.Context, execConfig *ExecConfig) (ContainerExec, error) + // CopyTo copies srcPath inside dstPath of the first container in the Pod + CopyTo(ctx context.Context, srcPath, dstPath string) error +} + +type ContainerExec interface { + Stdin() io.WriteCloser + Wait(ctx context.Context) (int, error) +} + +type PodConfig struct { + Containers []*ContainerConfig + Labels map[string]string + // The container dir where the init volume will be mounted + InitVolumeDir string +} + +type ContainerConfig struct { + Cmd []string + Env map[string]string + WorkingDir string + Image string + User string + RegistryAuth string +} + +type ExecConfig struct { + Cmd []string + Env map[string]string + WorkingDir string + User string + Stdout io.Writer + Stderr io.Writer + Tty bool +} diff --git a/internal/services/runservice/executor/executor.go b/internal/services/runservice/executor/executor.go new file mode 100644 index 0000000..da157b5 --- /dev/null +++ b/internal/services/runservice/executor/executor.go @@ -0,0 +1,931 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package executor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" + + uuid "github.com/satori/go.uuid" + "github.com/sorintlab/agola/internal/common" + slog "github.com/sorintlab/agola/internal/log" + "github.com/sorintlab/agola/internal/services/config" + "github.com/sorintlab/agola/internal/services/runservice/executor/driver" + rsapi "github.com/sorintlab/agola/internal/services/runservice/scheduler/api" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + + "github.com/gorilla/mux" + sockaddr "github.com/hashicorp/go-sockaddr" + "github.com/pkg/errors" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +var level = zap.NewAtomicLevelAt(zapcore.InfoLevel) +var logger = slog.New(level) +var log = logger.Sugar() + +const ( + defaultShell = "/bin/sh -e" + + taskIDLabel = "taskid" + + toolboxContainerDir = "/mnt/agola" +) + +var ( + toolboxContainerPath = filepath.Join(toolboxContainerDir, "/agola-toolbox") +) + +func (e *Executor) getAllPods(ctx context.Context, all bool) ([]driver.Pod, error) { + return e.driver.GetPodsByLabels(ctx, createAllLabels(), all) +} + +func (e *Executor) createFile(ctx context.Context, pod driver.Pod, command, user string, outf io.Writer) (string, error) { + cmd := []string{toolboxContainerPath, "createfile"} + + var buf bytes.Buffer + execConfig := &driver.ExecConfig{ + Cmd: cmd, + Stdout: &buf, + Stderr: outf, + User: user, + } + + ce, err := pod.Exec(ctx, execConfig) + if err != nil { + return "", err + } + + stdin := ce.Stdin() + go func() { + io.WriteString(stdin, command) + io.WriteString(stdin, "\n") + stdin.Close() + }() + + exitCode, err := ce.Wait(ctx) + if err != nil { + return "", err + } + if exitCode != 0 { + return "", errors.Errorf("toolbox exited with code: %d", exitCode) + } + + return buf.String(), nil +} + +func (e *Executor) doRunStep(ctx context.Context, s *types.RunStep, t *types.ExecutorTask, pod driver.Pod, logPath string) (int, error) { + if err := os.MkdirAll(filepath.Dir(logPath), 0770); err != nil { + return -1, err + } + outf, err := os.Create(logPath) + if err != nil { + return -1, err + } + defer outf.Close() + + shell := defaultShell + if t.Shell != "" { + shell = t.Shell + } + if s.Shell != "" { + shell = s.Shell + } + + // try to use the container specified user + user := t.Containers[0].User + if t.User != "" { + user = t.User + } + if s.User != "" { + user = s.User + } + + var cmd []string + if s.Command != "" { + filename, err := e.createFile(ctx, pod, s.Command, user, outf) + if err != nil { + return -1, errors.Errorf("create file err: %v", err) + } + + args := strings.Split(shell, " ") + cmd = append(args, filename) + } else { + cmd = strings.Split(shell, " ") + } + + // override task working dir with runstep working dir if provided + workingDir := t.WorkingDir + if s.WorkingDir != "" { + workingDir = s.WorkingDir + } + + // generate the environment using the task environment and then overriding with the runstep environment + environment := map[string]string{} + for envName, envValue := range t.Environment { + environment[envName] = envValue + } + for envName, envValue := range s.Environment { + environment[envName] = envValue + } + + execConfig := &driver.ExecConfig{ + Cmd: cmd, + Env: environment, + WorkingDir: workingDir, + User: user, + Stdout: outf, + Stderr: outf, + Tty: true, + } + + ce, err := pod.Exec(ctx, execConfig) + if err != nil { + return -1, err + } + + exitCode, err := ce.Wait(ctx) + if err != nil { + return -1, err + } + + return exitCode, nil +} + +func (e *Executor) doSaveToWorkspaceStep(ctx context.Context, s *types.SaveToWorkspaceStep, t *types.ExecutorTask, pod driver.Pod, logPath string, archivePath string) (int, error) { + cmd := []string{toolboxContainerPath, "archive"} + + if err := os.MkdirAll(filepath.Dir(logPath), 0770); err != nil { + return -1, err + } + logf, err := os.Create(logPath) + if err != nil { + return -1, err + } + defer logf.Close() + + if err := os.MkdirAll(filepath.Dir(archivePath), 0770); err != nil { + return -1, err + } + archivef, err := os.Create(archivePath) + if err != nil { + return -1, err + } + defer archivef.Close() + + execConfig := &driver.ExecConfig{ + Cmd: cmd, + Env: t.Environment, + WorkingDir: t.WorkingDir, + Stdout: archivef, + Stderr: logf, + } + + ce, err := pod.Exec(ctx, execConfig) + if err != nil { + return -1, err + } + + type ArchiveInfo struct { + SourceDir string + DestDir string + Paths []string + } + type Archive struct { + ArchiveInfos []*ArchiveInfo + OutFile string + } + + a := &Archive{ + OutFile: "", // use stdout + ArchiveInfos: make([]*ArchiveInfo, len(s.Contents)), + } + + for i, c := range s.Contents { + a.ArchiveInfos[i] = &ArchiveInfo{ + SourceDir: c.SourceDir, + DestDir: c.DestDir, + Paths: c.Paths, + } + + } + + stdin := ce.Stdin() + enc := json.NewEncoder(stdin) + + go func() { + enc.Encode(a) + stdin.Close() + }() + + exitCode, err := ce.Wait(ctx) + if err != nil { + return -1, err + } + + return exitCode, nil +} + +func (e *Executor) unarchive(ctx context.Context, t *types.ExecutorTask, source io.Reader, pod driver.Pod, logf io.Writer, destDir string, overwrite, removeDestDir bool) error { + args := []string{"--destdir", destDir} + if overwrite { + args = append(args, "--overwrite") + } + if removeDestDir { + args = append(args, "--remove-destdir") + } + cmd := append([]string{toolboxContainerPath, "unarchive"}, args...) + + execConfig := &driver.ExecConfig{ + Cmd: cmd, + Env: t.Environment, + WorkingDir: t.WorkingDir, + Stdout: logf, + Stderr: logf, + } + + ce, err := pod.Exec(ctx, execConfig) + if err != nil { + return err + } + + stdin := ce.Stdin() + go func() { + io.Copy(stdin, source) + stdin.Close() + }() + + exitCode, err := ce.Wait(ctx) + if err != nil { + return err + } + if exitCode != 0 { + return errors.Errorf("unarchive ended with exit code %d", exitCode) + } + + return nil +} + +func (e *Executor) doRestoreWorkspaceStep(ctx context.Context, s *types.RestoreWorkspaceStep, t *types.ExecutorTask, pod driver.Pod, logPath string) (int, error) { + if err := os.MkdirAll(filepath.Dir(logPath), 0770); err != nil { + return -1, err + } + logf, err := os.Create(logPath) + if err != nil { + return -1, err + } + defer logf.Close() + + // TODO(sgotti) right now we don't support duplicated files. So it's not currently possibile to overwrite a file in a upper layer. + for level, wl := range t.Workspace { + log.Debugf("unarchiving archives at level %d", level) + for _, archives := range wl { + for _, archive := range archives { + log.Debugf("unarchiving workspace at level %d, taskID: %s, step: %d", level, archive.TaskID, archive.Step) + resp, err := e.runserviceClient.GetArchive(ctx, archive.TaskID, archive.Step) + if err != nil { + // TODO(sgotti) retry before giving up + fmt.Fprintf(logf, "error reading workspace archive: %v\n", err) + return -1, err + } + archivef := resp.Body + if err := e.unarchive(ctx, t, archivef, pod, logf, s.DestDir, false, false); err != nil { + archivef.Close() + return -1, err + } + archivef.Close() + } + } + } + + return 0, nil +} + +func (e *Executor) executorIDPath() string { + return filepath.Join(e.c.DataDir, "id") +} + +func (e *Executor) tasksDir() string { + return filepath.Join(e.c.DataDir, "tasks") +} + +func (e *Executor) taskPath(taskID string) string { + return filepath.Join(e.tasksDir(), taskID) +} + +func (e *Executor) logPath(taskID string, stepID int) string { + return filepath.Join(e.taskPath(taskID), "logs", fmt.Sprintf("%d.log", stepID)) +} + +func (e *Executor) archivePath(taskID string, stepID int) string { + return filepath.Join(e.taskPath(taskID), "archives", fmt.Sprintf("%d.tar", stepID)) +} + +func mkdirAllAndReplace(path string, perm os.FileMode) error { + // if the dir already exists rename it. + _, err := os.Stat(path) + if err != nil && !os.IsNotExist(err) { + return err + } + if os.IsNotExist(err) { + return os.MkdirAll(path, perm) + } + // TODO(sgotti) UnixNano should be enough but doesn't totally avoids name collisions. + return os.Rename(path, fmt.Sprintf("%s.%d", path, time.Now().UnixNano())) +} + +func (e *Executor) sendExecutorStatus(ctx context.Context) error { + executor := &types.Executor{ + ID: e.id, + ListenURL: e.listenURL, + } + + log.Debugf("send executor status: %s", util.Dump(executor)) + _, err := e.runserviceClient.SendExecutorStatus(ctx, executor) + return err +} + +func (e *Executor) sendExecutorTaskStatus(ctx context.Context, et *types.ExecutorTask) error { + log.Debugf("send executor task: %s. status: %s", et.ID, et.Status.Phase) + _, err := e.runserviceClient.SendExecutorTaskStatus(ctx, e.id, et) + return err +} + +func (e *Executor) stopTask(ctx context.Context, et *types.ExecutorTask) { + if rt, ok := e.runningTasks.get(et.ID); ok { + rt.Lock() + defer rt.Unlock() + if rt.et.Status.Phase.IsFinished() { + return + } + if rt.pod != nil { + if err := rt.pod.Stop(ctx); err != nil { + log.Errorf("err: %+v", err) + return + } + if rt.et.Status.Phase == types.ExecutorTaskPhaseNotStarted { + rt.et.Status.Phase = types.ExecutorTaskPhaseCancelled + } else { + rt.et.Status.Phase = types.ExecutorTaskPhaseStopped + } + if err := e.sendExecutorTaskStatus(ctx, et); err != nil { + log.Errorf("err: %+v", err) + return + } + } + } +} + +func (e *Executor) executeTask(ctx context.Context, et *types.ExecutorTask) { + // * save in local state that we have a running task + // * start the pod + // * then update the executortask status to in-progress + // if something fails pod will be cleaned up by the pod cleaner goroutine + // In this way we are sure that the pod cleaner will only remove pod that don't + // have an in progress running task + + if et.Status.Phase != types.ExecutorTaskPhaseNotStarted { + log.Debugf("task phase is not \"not started\"") + return + } + + rt := &runningTask{ + et: et, + } + + rt.Lock() + + if !e.runningTasks.addIfNotExists(et.ID, rt) { + log.Debugf("task %s already running", et.ID) + return + } + + defer e.runningTasks.delete(et.ID) + + rt.et.Status.Phase = types.ExecutorTaskPhaseRunning + rt.et.Status.StartTime = util.TimePtr(time.Now()) + + if err := e.sendExecutorTaskStatus(ctx, et); err != nil { + log.Errorf("err: %+v", err) + rt.Unlock() + return + } + + log.Debugf("starting pod") + podConfig := &driver.PodConfig{ + Labels: createTaskLabels(et.ID), + InitVolumeDir: toolboxContainerDir, + Containers: []*driver.ContainerConfig{ + { + Image: et.Containers[0].Image, + Cmd: []string{toolboxContainerPath, "sleeper"}, + Env: et.Containers[0].Environment, + WorkingDir: et.WorkingDir, + User: et.Containers[0].User, + }, + }, + } + pod, err := e.driver.NewPod(ctx, podConfig) + if err != nil { + log.Errorf("err: %+v", err) + rt.Unlock() + return + } + rt.pod = pod + // ignore pod stop errors + defer pod.Stop(ctx) + + log.Debugf("started pod") + + rt.Unlock() + + _, err = e.executeTaskInternal(ctx, et, pod) + + rt.Lock() + if err != nil { + log.Errorf("err: %+v", err) + rt.et.Status.Phase = types.ExecutorTaskPhaseFailed + } else { + rt.et.Status.Phase = types.ExecutorTaskPhaseSuccess + } + + rt.et.Status.EndTime = util.TimePtr(time.Now()) + + if err := e.sendExecutorTaskStatus(ctx, et); err != nil { + log.Errorf("err: %+v", err) + } + rt.Unlock() +} + +func (e *Executor) executeTaskInternal(ctx context.Context, et *types.ExecutorTask, pod driver.Pod) (int, error) { + log.Debugf("task: %s", et.TaskName) + + if err := mkdirAllAndReplace(e.taskPath(et.ID), 0770); err != nil { + return 0, err + } + + for i, step := range et.Steps { + //log.Debugf("step: %v", util.Dump(step)) + + rt, ok := e.runningTasks.get(et.ID) + if !ok { + panic(errors.Errorf("not running task for task id %s, this should never happen", et.ID)) + } + + rt.Lock() + rt.et.Status.Steps[i].Phase = types.ExecutorTaskPhaseRunning + rt.et.Status.Steps[i].StartTime = util.TimePtr(time.Now()) + if err := e.sendExecutorTaskStatus(ctx, et); err != nil { + log.Errorf("err: %+v", err) + } + rt.Unlock() + + var err error + var exitCode int + var stepName string + + switch s := step.(type) { + case *types.RunStep: + log.Debugf("run step: %s", util.Dump(s)) + stepName = s.Name + exitCode, err = e.doRunStep(ctx, s, et, pod, e.logPath(et.ID, i)) + + case *types.SaveToWorkspaceStep: + log.Debugf("save to workspace step: %s", util.Dump(s)) + stepName = s.Name + archivePath := e.archivePath(et.ID, i) + exitCode, err = e.doSaveToWorkspaceStep(ctx, s, et, pod, e.logPath(et.ID, i), archivePath) + + case *types.RestoreWorkspaceStep: + log.Debugf("restore workspace step: %s", util.Dump(s)) + stepName = s.Name + exitCode, err = e.doRestoreWorkspaceStep(ctx, s, et, pod, e.logPath(et.ID, i)) + + default: + return i, errors.Errorf("unknown step type: %s", util.Dump(s)) + } + + var serr error + + rt.Lock() + rt.et.Status.Steps[i].EndTime = util.TimePtr(time.Now()) + + rt.et.Status.Steps[i].Phase = types.ExecutorTaskPhaseSuccess + + if err != nil { + if rt.et.Stop { + rt.et.Status.Steps[i].Phase = types.ExecutorTaskPhaseStopped + } else { + rt.et.Status.Steps[i].Phase = types.ExecutorTaskPhaseFailed + } + serr = errors.Wrapf(err, "failed to execute step") + } else if exitCode != 0 { + rt.et.Status.Steps[i].Phase = types.ExecutorTaskPhaseFailed + rt.et.Status.Steps[i].ExitCode = exitCode + serr = errors.Errorf("step %q failed with exitcode %d", stepName, exitCode) + } + + if err := e.sendExecutorTaskStatus(ctx, et); err != nil { + log.Errorf("err: %+v", err) + } + rt.Unlock() + + if serr != nil { + return i, serr + } + } + + return 0, nil +} + +func createAllLabels() map[string]string { + return map[string]string{} +} + +func createTaskLabels(taskID string) map[string]string { + return map[string]string{ + taskIDLabel: taskID, + } +} + +func (e *Executor) podsCleanerLoop(ctx context.Context) { + for { + log.Debugf("podsCleaner") + + if err := e.podsCleaner(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(1 * time.Second) + } +} + +func (e *Executor) podsCleaner(ctx context.Context) error { + pods, err := e.getAllPods(ctx, true) + if err != nil { + return err + } + for _, pod := range pods { + taskID, ok := pod.Labels()[taskIDLabel] + if !ok { + continue + } + if _, ok := e.runningTasks.get(taskID); !ok { + log.Infof("removing pod %s for not running task: %s", pod.ID(), taskID) + pod.Remove(ctx) + } + } + + return nil +} + +func (e *Executor) executorStatusSenderLoop(ctx context.Context) { + for { + log.Debugf("executorStatusSender") + + if err := e.sendExecutorStatus(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (e *Executor) tasksCleanerLoop(ctx context.Context) { + for { + log.Debugf("tasksCleaner") + + if err := e.tasksCleaner(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (e *Executor) tasksCleaner(ctx context.Context) error { + ets, _, err := e.runserviceClient.GetExecutorTasks(ctx, e.id) + if err != nil { + log.Warnf("err: %v", err) + return err + } + log.Debugf("ets: %v", util.Dump(ets)) + for _, et := range ets { + go e.cleanTask(ctx, et) + } + + return nil +} + +func (e *Executor) cleanTask(ctx context.Context, et *types.ExecutorTask) { + log.Debugf("et: %v", util.Dump(et)) + if et.Status.ExecutorID != e.id { + return + } + + if et.Stop { + e.stopTask(ctx, et) + } + + if et.Status.Phase == types.ExecutorTaskPhaseNotStarted { + e.executeTask(ctx, et) + } + + if et.Status.Phase == types.ExecutorTaskPhaseRunning { + _, ok := e.runningTasks.get(et.ID) + if !ok { + log.Infof("marking executor task %s as failed since there's no running task", et.ID) + et.Status.Phase = types.ExecutorTaskPhaseFailed + // mark in progress step as failed too + for _, s := range et.Status.Steps { + if s.Phase == types.ExecutorTaskPhaseRunning { + s.Phase = types.ExecutorTaskPhaseFailed + s.EndTime = util.TimePtr(time.Now()) + } + } + e.sendExecutorTaskStatus(ctx, et) + } + } +} + +func (e *Executor) tasksDataCleanerLoop(ctx context.Context) { + for { + log.Debugf("tasksDataCleaner") + + if err := e.tasksDataCleaner(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (e *Executor) tasksDataCleaner(ctx context.Context) error { + entries, err := ioutil.ReadDir(e.tasksDir()) + if err != nil { + return err + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + etID := filepath.Base(entry.Name()) + + _, resp, err := e.runserviceClient.GetExecutorTask(ctx, e.id, etID) + if err != nil { + if resp == nil { + return err + } + if resp.StatusCode != http.StatusNotFound { + return err + } + } + if resp.StatusCode == http.StatusNotFound { + taskDir := filepath.Join(e.tasksDir(), entry.Name()) + log.Infof("removing task dir %q", taskDir) + // remove task dir + if err := os.RemoveAll(taskDir); err != nil { + return err + } + } + } + + return nil +} + +type runningTasks struct { + tasks map[string]*runningTask + m sync.Mutex +} + +type runningTask struct { + sync.Mutex + + et *types.ExecutorTask + pod driver.Pod +} + +func (r *runningTasks) get(rtID string) (*runningTask, bool) { + r.m.Lock() + defer r.m.Unlock() + rt, ok := r.tasks[rtID] + return rt, ok +} + +func (r *runningTasks) addIfNotExists(rtID string, rt *runningTask) bool { + r.m.Lock() + defer r.m.Unlock() + if _, ok := r.tasks[rtID]; ok { + return false + } + r.tasks[rtID] = rt + return true +} + +func (r *runningTasks) add(rtID string, rt *runningTask) { + r.m.Lock() + defer r.m.Unlock() + r.tasks[rtID] = rt +} + +func (r *runningTasks) delete(rtID string) { + r.m.Lock() + defer r.m.Unlock() + delete(r.tasks, rtID) +} + +func (e *Executor) handleTasks(ctx context.Context, c <-chan *types.ExecutorTask) { + for et := range c { + go e.executeTask(ctx, et) + } +} + +func (e *Executor) getExecutorID() (string, error) { + id, err := ioutil.ReadFile(e.executorIDPath()) + if err != nil && !os.IsNotExist(err) { + return "", err + } + return string(id), nil +} + +func (e *Executor) saveExecutorID(id string) error { + if err := common.WriteFileAtomic(e.executorIDPath(), []byte(id), 0660); err != nil { + return errors.Wrapf(err, "failed to write executor id file") + } + return nil +} + +type Executor struct { + c *config.RunServiceExecutor + runserviceClient *rsapi.Client + id string + runningTasks *runningTasks + driver driver.Driver + listenURL string +} + +func NewExecutor(c *config.RunServiceExecutor) (*Executor, error) { + if c.Debug { + level.SetLevel(zapcore.DebugLevel) + } + + var err error + c.ToolboxPath, err = filepath.Abs(c.ToolboxPath) + if err != nil { + return nil, errors.Wrapf(err, "cannot find \"agola-toolbox\" absolute path") + } + if c.ToolboxPath == "" { + path, err := exec.LookPath("agola-toolbox") + if err != nil { + return nil, errors.Errorf("cannot find \"agola-toolbox\" binaries in PATH, agola-toolbox path must be explicitly provided") + } + c.ToolboxPath = path + } + + dockerDriver, err := driver.NewDockerDriver(logger, "/tmp/agola/bin") + if err != nil { + return nil, errors.Wrapf(err, "failed to create docker client") + } + + e := &Executor{ + c: c, + runserviceClient: rsapi.NewClient(c.RunServiceURL), + driver: dockerDriver, + runningTasks: &runningTasks{ + tasks: make(map[string]*runningTask), + }, + } + + if err := os.MkdirAll(e.tasksDir(), 0770); err != nil { + return nil, err + } + + id, err := e.getExecutorID() + if err != nil { + return nil, err + } + if id == "" { + id = uuid.NewV4().String() + if err := e.saveExecutorID(id); err != nil { + return nil, err + } + } + + e.id = id + + addr, err := sockaddr.GetPrivateIP() + if err != nil { + return nil, errors.Wrapf(err, "cannot discover executor listen address") + } + if addr == "" { + return nil, errors.Errorf("cannot discover executor listen address") + } + u := url.URL{Scheme: "http"} + if c.Web.TLS { + u.Scheme = "https" + } + _, port, err := net.SplitHostPort(c.Web.ListenAddress) + if err != nil { + return nil, errors.Wrapf(err, "cannot get web listen port") + } + u.Host = net.JoinHostPort(addr, port) + e.listenURL = u.String() + + return e, nil +} + +func (e *Executor) Run(ctx context.Context) error { + if err := e.driver.(*driver.DockerDriver).CopyToolbox(context.TODO(), e.c.ToolboxPath); err != nil { + return err + } + + ch := make(chan *types.ExecutorTask) + schedulerHandler := NewTaskSubmissionHandler(ch) + logsHandler := NewLogsHandler(logger, e) + archivesHandler := NewArchivesHandler(e) + + router := mux.NewRouter() + apirouter := router.PathPrefix("/api/v1alpha").Subrouter() + + apirouter.Handle("/executor", schedulerHandler).Methods("POST") + apirouter.Handle("/executor/logs", logsHandler).Methods("GET") + apirouter.Handle("/executor/archives", archivesHandler).Methods("GET") + + go e.executorStatusSenderLoop(ctx) + go e.podsCleanerLoop(ctx) + go e.tasksCleanerLoop(ctx) + go e.tasksDataCleanerLoop(ctx) + + go e.handleTasks(ctx, ch) + + httpServer := http.Server{ + Addr: e.c.Web.ListenAddress, + Handler: apirouter, + } + lerrCh := make(chan error) + go func() { + lerrCh <- httpServer.ListenAndServe() + }() + + select { + case <-ctx.Done(): + log.Infof("runservice executor exiting") + httpServer.Close() + return nil + case err := <-lerrCh: + log.Errorf("http server listen error: %v", err) + return err + } +} diff --git a/internal/services/runservice/scheduler/api/api.go b/internal/services/runservice/scheduler/api/api.go new file mode 100644 index 0000000..6777e3d --- /dev/null +++ b/internal/services/runservice/scheduler/api/api.go @@ -0,0 +1,569 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strconv" + + "github.com/sorintlab/agola/internal/db" + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/command" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/readdb" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/store" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/wal" + + "github.com/gorilla/mux" + "github.com/pkg/errors" + "go.uber.org/zap" +) + +type LogsHandler struct { + log *zap.SugaredLogger + e *etcd.Store + lts *objectstorage.ObjStorage + wal *wal.WalManager +} + +func NewLogsHandler(logger *zap.Logger, e *etcd.Store, lts *objectstorage.ObjStorage, wal *wal.WalManager) *LogsHandler { + return &LogsHandler{ + log: logger.Sugar(), + e: e, + lts: lts, + wal: wal, + } +} + +func (h *LogsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // TODO(sgotti) Check authorized call from client + + runID := r.URL.Query().Get("runid") + if runID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + taskID := r.URL.Query().Get("taskid") + if taskID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + s := r.URL.Query().Get("step") + if s == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + step, err := strconv.Atoi(s) + if err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + follow := false + if _, ok := r.URL.Query()["follow"]; ok { + follow = true + } + stream := false + if _, ok := r.URL.Query()["stream"]; ok { + stream = true + } + if follow { + stream = true + } + + if err, sendError := h.readTaskLogs(ctx, runID, taskID, step, w, follow, stream); err != nil { + h.log.Errorf("err: %+v", err) + if sendError { + switch err.(type) { + case common.ErrNotExist: + http.Error(w, err.Error(), http.StatusNotFound) + default: + http.Error(w, err.Error(), http.StatusInternalServerError) + } + } + } +} + +func (h *LogsHandler) readTaskLogs(ctx context.Context, runID, taskID string, step int, w http.ResponseWriter, follow, stream bool) (error, bool) { + r, err := store.GetRunEtcdOrLTS(ctx, h.e, h.wal, runID) + if err != nil { + return err, true + } + if r == nil { + return errors.Errorf("no such run with id: %s", runID), true + } + + task, ok := r.RunTasks[taskID] + if !ok { + return errors.Errorf("no such task with ID %s in run %s", taskID, runID), true + } + if len(task.Steps) <= step { + return errors.Errorf("no such step for task %s in run %s", taskID, runID), true + } + + // if the log has been already fetched use it, otherwise fetch it from the executor + if task.Steps[step].LogPhase == types.RunTaskFetchPhaseFinished { + logPath := store.LTSRunLogPath(task.ID, step) + f, err := h.lts.ReadObject(logPath) + if err != nil { + if err == objectstorage.ErrNotExist { + return common.NewErrNotExist(err), true + } + return err, true + } + defer f.Close() + return sendLogs(w, f, stream), false + } + + et, err := store.GetExecutorTask(ctx, h.e, task.ID) + if err != nil { + return err, true + } + executor, err := store.GetExecutor(ctx, h.e, et.Status.ExecutorID) + if err != nil && err != etcd.ErrKeyNotFound { + return err, true + } + if executor == nil { + return common.NewErrNotExist(errors.Errorf("executor with id %q doesn't exist", et.Status.ExecutorID)), true + } + + url := fmt.Sprintf("%s/api/v1alpha/executor/logs?taskid=%s&step=%d", executor.ListenURL, taskID, step) + if follow { + url += "&follow" + } + req, err := http.Get(url) + if err != nil { + return err, true + } + defer req.Body.Close() + if req.StatusCode != http.StatusOK { + if req.StatusCode == http.StatusNotFound { + return common.NewErrNotExist(errors.New("no log on executor")), true + } + return errors.Errorf("received http status: %d", req.StatusCode), true + } + + return sendLogs(w, req.Body, stream), false +} + +func sendLogs(w http.ResponseWriter, r io.Reader, stream bool) error { + if stream { + w.Header().Set("Content-Type", "text/event-stream") + } + + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + br := bufio.NewReader(r) + + var flusher http.Flusher + if fl, ok := w.(http.Flusher); ok { + flusher = fl + } + stop := false + for { + if stop { + return nil + } + data, err := br.ReadBytes('\n') + if err != nil { + if err != io.EOF { + return err + } + if len(data) == 0 { + return nil + } + stop = true + } + if stream { + if _, err := w.Write([]byte(fmt.Sprintf("data: %s\n", data))); err != nil { + return err + } + } else { + if _, err := w.Write(data); err != nil { + return err + } + } + if flusher != nil { + flusher.Flush() + } + } +} + +type ChangeGroupsUpdateTokensHandler struct { + log *zap.SugaredLogger + readDB *readdb.ReadDB +} + +func NewChangeGroupsUpdateTokensHandler(logger *zap.Logger, readDB *readdb.ReadDB) *ChangeGroupsUpdateTokensHandler { + return &ChangeGroupsUpdateTokensHandler{ + log: logger.Sugar(), + readDB: readDB, + } +} + +func (h *ChangeGroupsUpdateTokensHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + query := r.URL.Query() + groups := query["changegroup"] + + var cgt *types.ChangeGroupsUpdateToken + + err := h.readDB.Do(func(tx *db.Tx) error { + var err error + cgt, err = h.readDB.GetChangeGroupsUpdateTokens(tx, groups) + return err + }) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + cgts, err := types.MarshalChangeGroupsUpdateToken(cgt) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(cgts); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } +} + +type RunResponse struct { + Run *types.Run `json:"run"` + RunConfig *types.RunConfig `json:"run_config"` +} + +type RunHandler struct { + log *zap.SugaredLogger + e *etcd.Store + wal *wal.WalManager + readDB *readdb.ReadDB +} + +func NewRunHandler(logger *zap.Logger, e *etcd.Store, wal *wal.WalManager, readDB *readdb.ReadDB) *RunHandler { + return &RunHandler{ + log: logger.Sugar(), + e: e, + wal: wal, + readDB: readDB, + } +} + +func (h *RunHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + runID := vars["runid"] + + run, _, err := store.GetRun(ctx, h.e, runID) + if err != nil && err != etcd.ErrKeyNotFound { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if run == nil { + run, err = store.LTSGetRun(h.wal, runID) + if err != nil && err != objectstorage.ErrNotExist { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + if run == nil { + http.Error(w, "", http.StatusNotFound) + return + } + + rc, err := store.LTSGetRunConfig(h.wal, run.ID) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + res := &RunResponse{ + Run: run, + RunConfig: rc, + } + + if err := json.NewEncoder(w).Encode(res); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } +} + +const ( + DefaultRunsLimit = 25 + MaxRunsLimit = 40 +) + +type GetRunsResponse struct { + Runs []*types.Run `json:"runs"` + ChangeGroupsUpdateToken string `json:"change_groups_update_tokens"` +} + +type RunsHandler struct { + log *zap.SugaredLogger + readDB *readdb.ReadDB +} + +func NewRunsHandler(logger *zap.Logger, readDB *readdb.ReadDB) *RunsHandler { + return &RunsHandler{ + log: logger.Sugar(), + readDB: readDB, + } +} + +func (h *RunsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + query := r.URL.Query() + phaseFilter := types.RunPhaseFromStringSlice(query["phase"]) + + changeGroups := query["changegroup"] + groups := query["group"] + + limitS := query.Get("limit") + limit := DefaultRunsLimit + if limitS != "" { + var err error + limit, err = strconv.Atoi(limitS) + if err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + } + if limit < 0 { + http.Error(w, "limit must be greater or equal than 0", http.StatusBadRequest) + return + } + if limit > MaxRunsLimit { + limit = MaxRunsLimit + } + sortOrder := types.SortOrderDesc + if _, ok := query["asc"]; ok { + sortOrder = types.SortOrderAsc + } + + start := query.Get("start") + + var runs []*types.Run + var cgt *types.ChangeGroupsUpdateToken + + if len(groups) == 0 { + groups = []string{"."} + } + err := h.readDB.Do(func(tx *db.Tx) error { + if err := h.readDB.PrefetchRuns(tx, groups, phaseFilter, start, limit, sortOrder); err != nil { + h.log.Errorf("err: %+v", err) + return err + } + return nil + }) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + err = h.readDB.Do(func(tx *db.Tx) error { + var err error + runs, err = h.readDB.GetRuns(tx, groups, phaseFilter, start, limit, sortOrder) + if err != nil { + h.log.Errorf("err: %+v", err) + return err + } + + cgt, err = h.readDB.GetChangeGroupsUpdateTokens(tx, changeGroups) + return err + }) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + cgts, err := types.MarshalChangeGroupsUpdateToken(cgt) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + resp := &GetRunsResponse{ + Runs: runs, + ChangeGroupsUpdateToken: cgts, + } + if err := json.NewEncoder(w).Encode(resp); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } +} + +type RunCreateRequest struct { + RunConfig *types.RunConfig `json:"run_config"` + Group string `json:"group"` + Environment map[string]string `json:"environment"` + Annotations map[string]string `json:"annotations"` + ChangeGroupsUpdateToken string `json:"changeup_update_tokens"` +} + +type RunCreateHandler struct { + log *zap.SugaredLogger + ch *command.CommandHandler +} + +func NewRunCreateHandler(logger *zap.Logger, ch *command.CommandHandler) *RunCreateHandler { + return &RunCreateHandler{ + log: logger.Sugar(), + ch: ch, + } +} + +func (h *RunCreateHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + var req RunCreateRequest + d := json.NewDecoder(r.Body) + if err := d.Decode(&req); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + creq := &command.RunCreateRequest{ + RunConfig: req.RunConfig, + Group: req.Group, + Environment: req.Environment, + Annotations: req.Annotations, + ChangeGroupsUpdateToken: req.ChangeGroupsUpdateToken, + } + if err := h.ch.CreateRun(ctx, creq); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } +} + +type RunActionType string + +const ( + RunActionTypeChangePhase RunActionType = "changephase" +) + +type RunActionsRequest struct { + ActionType RunActionType `json:"action_type"` + + Phase types.RunPhase `json:"phase"` + ChangeGroupsUpdateToken string `json:"change_groups_update_tokens"` +} + +type RunActionsHandler struct { + log *zap.SugaredLogger + ch *command.CommandHandler + readDB *readdb.ReadDB +} + +func NewRunActionsHandler(logger *zap.Logger, ch *command.CommandHandler) *RunActionsHandler { + return &RunActionsHandler{ + log: logger.Sugar(), + ch: ch, + } +} + +func (h *RunActionsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + runID := vars["runid"] + + // TODO(sgotti) Check authorized call from client + var req RunActionsRequest + d := json.NewDecoder(r.Body) + if err := d.Decode(&req); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + switch req.ActionType { + case RunActionTypeChangePhase: + creq := &command.RunChangePhaseRequest{ + RunID: runID, + Phase: req.Phase, + ChangeGroupsUpdateToken: req.ChangeGroupsUpdateToken, + } + if err := h.ch.ChangeRunPhase(ctx, creq); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + default: + http.Error(w, "", http.StatusBadRequest) + return + } +} + +type RunTaskActionType string + +const ( + RunTaskActionTypeApprove RunTaskActionType = "approve" +) + +type RunTaskActionsRequest struct { + ActionType RunTaskActionType `json:"action_type"` + ApprovalAnnotations map[string]string `json:"approval_annotations,omitempty"` + ChangeGroupsUpdateToken string `json:"change_groups_update_tokens"` +} + +type RunTaskActionsHandler struct { + log *zap.SugaredLogger + ch *command.CommandHandler + readDB *readdb.ReadDB +} + +func NewRunTaskActionsHandler(logger *zap.Logger, ch *command.CommandHandler) *RunTaskActionsHandler { + return &RunTaskActionsHandler{ + log: logger.Sugar(), + ch: ch, + } +} + +func (h *RunTaskActionsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + runID := vars["runid"] + taskID := vars["taskid"] + + // TODO(sgotti) Check authorized call from client + var req RunTaskActionsRequest + d := json.NewDecoder(r.Body) + if err := d.Decode(&req); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + switch req.ActionType { + case RunTaskActionTypeApprove: + creq := &command.RunTaskApproveRequest{ + RunID: runID, + TaskID: taskID, + ChangeGroupsUpdateToken: req.ChangeGroupsUpdateToken, + } + if err := h.ch.ApproveRunTask(ctx, creq); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + default: + http.Error(w, "", http.StatusBadRequest) + return + } +} diff --git a/internal/services/runservice/scheduler/api/client.go b/internal/services/runservice/scheduler/api/client.go new file mode 100644 index 0000000..dc05e63 --- /dev/null +++ b/internal/services/runservice/scheduler/api/client.go @@ -0,0 +1,252 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "net/http" + "net/url" + "strconv" + "strings" + + "github.com/pkg/errors" + rstypes "github.com/sorintlab/agola/internal/services/runservice/types" +) + +var jsonContent = http.Header{"content-type": []string{"application/json"}} + +// Client represents a Gogs API client. +type Client struct { + url string + client *http.Client +} + +// NewClient initializes and returns a API client. +func NewClient(url string) *Client { + return &Client{ + url: strings.TrimSuffix(url, "/"), + client: &http.Client{}, + } +} + +// SetHTTPClient replaces default http.Client with user given one. +func (c *Client) SetHTTPClient(client *http.Client) { + c.client = client +} + +func (c *Client) doRequest(ctx context.Context, method, path string, query url.Values, header http.Header, ibody io.Reader) (*http.Response, error) { + u, err := url.Parse(c.url + "/api/v1alpha" + path) + if err != nil { + return nil, err + } + u.RawQuery = query.Encode() + + req, err := http.NewRequest(method, u.String(), ibody) + req = req.WithContext(ctx) + if err != nil { + return nil, err + } + for k, v := range header { + req.Header[k] = v + } + + return c.client.Do(req) +} + +func (c *Client) getResponse(ctx context.Context, method, path string, query url.Values, header http.Header, ibody io.Reader) (*http.Response, error) { + resp, err := c.doRequest(ctx, method, path, query, header, ibody) + if err != nil { + return nil, err + } + + if resp.StatusCode/100 != 2 { + defer resp.Body.Close() + data, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + if len(data) <= 1 { + return resp, errors.New(resp.Status) + } + + // TODO(sgotti) use a json error response + + return resp, errors.New(string(data)) + } + + return resp, nil +} + +func (c *Client) getParsedResponse(ctx context.Context, method, path string, query url.Values, header http.Header, ibody io.Reader, obj interface{}) (*http.Response, error) { + resp, err := c.getResponse(ctx, method, path, query, header, ibody) + if err != nil { + return resp, err + } + defer resp.Body.Close() + + d := json.NewDecoder(resp.Body) + + return resp, d.Decode(obj) +} + +func (c *Client) SendExecutorStatus(ctx context.Context, executor *rstypes.Executor) (*http.Response, error) { + executorj, err := json.Marshal(executor) + if err != nil { + return nil, err + } + return c.getResponse(ctx, "POST", fmt.Sprintf("/executor/%s", executor.ID), nil, jsonContent, bytes.NewReader(executorj)) +} + +func (c *Client) SendExecutorTaskStatus(ctx context.Context, executorID string, et *rstypes.ExecutorTask) (*http.Response, error) { + etj, err := json.Marshal(et) + if err != nil { + return nil, err + } + return c.getResponse(ctx, "POST", fmt.Sprintf("/executor/%s/tasks/%s", executorID, et.ID), nil, jsonContent, bytes.NewReader(etj)) +} + +func (c *Client) GetExecutorTask(ctx context.Context, executorID, etID string) (*rstypes.ExecutorTask, *http.Response, error) { + et := new(rstypes.ExecutorTask) + resp, err := c.getParsedResponse(ctx, "GET", fmt.Sprintf("/executor/%s/tasks/%s", executorID, etID), nil, jsonContent, nil, et) + return et, resp, err +} + +func (c *Client) GetExecutorTasks(ctx context.Context, executorID string) ([]*rstypes.ExecutorTask, *http.Response, error) { + ets := []*rstypes.ExecutorTask{} + resp, err := c.getParsedResponse(ctx, "GET", fmt.Sprintf("/executor/%s/tasks", executorID), nil, jsonContent, nil, &ets) + return ets, resp, err +} + +func (c *Client) GetArchive(ctx context.Context, taskID string, step int) (*http.Response, error) { + q := url.Values{} + q.Add("taskid", taskID) + q.Add("step", strconv.Itoa(step)) + + return c.getResponse(ctx, "GET", "/executor/archives", q, nil, nil) +} + +func (c *Client) GetRuns(ctx context.Context, phaseFilter, groups, changeGroups []string, start string, limit int, asc bool) (*GetRunsResponse, *http.Response, error) { + q := url.Values{} + for _, phase := range phaseFilter { + q.Add("phase", phase) + } + for _, group := range groups { + q.Add("group", group) + } + for _, changeGroup := range changeGroups { + q.Add("changegroup", changeGroup) + } + if start != "" { + q.Add("start", start) + } + if limit > 0 { + q.Add("limit", strconv.Itoa(limit)) + } + if asc { + q.Add("asc", "") + } + + getRunsResponse := new(GetRunsResponse) + resp, err := c.getParsedResponse(ctx, "GET", "/runs", q, jsonContent, nil, getRunsResponse) + return getRunsResponse, resp, err +} + +func (c *Client) GetQueuedRuns(ctx context.Context, start string, limit int) (*GetRunsResponse, *http.Response, error) { + return c.GetRuns(ctx, []string{"queued"}, []string{"."}, nil, start, limit, true) +} + +func (c *Client) GetGroupQueuedRuns(ctx context.Context, group string, limit int, changeGroups []string) (*GetRunsResponse, *http.Response, error) { + return c.GetRuns(ctx, []string{"queued"}, []string{group}, changeGroups, "", limit, false) +} + +func (c *Client) GetGroupRunningRuns(ctx context.Context, group string, limit int, changeGroups []string) (*GetRunsResponse, *http.Response, error) { + return c.GetRuns(ctx, []string{"running"}, []string{group}, changeGroups, "", limit, false) +} + +func (c *Client) GetGroupFirstQueuedRuns(ctx context.Context, group string, changeGroups []string) (*GetRunsResponse, *http.Response, error) { + return c.GetRuns(ctx, []string{"queued"}, []string{group}, changeGroups, "", 1, true) +} + +func (c *Client) CreateRun(ctx context.Context, req *RunCreateRequest) (*http.Response, error) { + reqj, err := json.Marshal(req) + if err != nil { + return nil, err + } + + return c.getResponse(ctx, "PUT", "/runs", nil, jsonContent, bytes.NewReader(reqj)) +} + +func (c *Client) RunActions(ctx context.Context, runID string, req *RunActionsRequest) (*http.Response, error) { + reqj, err := json.Marshal(req) + if err != nil { + return nil, err + } + return c.getResponse(ctx, "POST", fmt.Sprintf("/runs/%s/actions", runID), nil, jsonContent, bytes.NewReader(reqj)) +} + +func (c *Client) StartRun(ctx context.Context, runID string, changeGroupsUpdateToken string) (*http.Response, error) { + req := &RunActionsRequest{ + ActionType: RunActionTypeChangePhase, + Phase: rstypes.RunPhaseRunning, + ChangeGroupsUpdateToken: changeGroupsUpdateToken, + } + + return c.RunActions(ctx, runID, req) +} + +func (c *Client) RunTaskActions(ctx context.Context, runID, taskID string, req *RunTaskActionsRequest) (*http.Response, error) { + reqj, err := json.Marshal(req) + if err != nil { + return nil, err + } + return c.getResponse(ctx, "POST", fmt.Sprintf("/runs/%s/tasks/%s/actions", runID, taskID), nil, jsonContent, bytes.NewReader(reqj)) +} + +func (c *Client) ApproveRunTask(ctx context.Context, runID, taskID string, approvalAnnotations map[string]string, changeGroupsUpdateToken string) (*http.Response, error) { + req := &RunTaskActionsRequest{ + ActionType: RunTaskActionTypeApprove, + ApprovalAnnotations: approvalAnnotations, + ChangeGroupsUpdateToken: changeGroupsUpdateToken, + } + + return c.RunTaskActions(ctx, runID, taskID, req) +} + +func (c *Client) GetRun(ctx context.Context, runID string) (*RunResponse, *http.Response, error) { + runResponse := new(RunResponse) + resp, err := c.getParsedResponse(ctx, "GET", fmt.Sprintf("/runs/%s", runID), nil, jsonContent, nil, runResponse) + return runResponse, resp, err +} + +func (c *Client) GetLogs(ctx context.Context, runID, taskID string, step int, follow, stream bool) (*http.Response, error) { + q := url.Values{} + q.Add("runid", runID) + q.Add("taskid", taskID) + q.Add("step", strconv.Itoa(step)) + if follow { + q.Add("follow", "") + } + if stream { + q.Add("stream", "") + } + + return c.getResponse(ctx, "GET", "/logs", q, nil, nil) +} diff --git a/internal/services/runservice/scheduler/api/executor.go b/internal/services/runservice/scheduler/api/executor.go new file mode 100644 index 0000000..a6ebb40 --- /dev/null +++ b/internal/services/runservice/scheduler/api/executor.go @@ -0,0 +1,247 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package api + +import ( + "bufio" + "encoding/json" + "io" + "net/http" + "strconv" + + "github.com/gorilla/mux" + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/command" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/store" + "github.com/sorintlab/agola/internal/services/runservice/types" + "go.uber.org/zap" +) + +type ExecutorStatusHandler struct { + e *etcd.Store + c chan<- *types.ExecutorTask +} + +func NewExecutorStatusHandler(e *etcd.Store, c chan<- *types.ExecutorTask) *ExecutorStatusHandler { + return &ExecutorStatusHandler{e: e, c: c} +} + +func (h *ExecutorStatusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // TODO(sgotti) Check authorized call from executors + var executor *types.Executor + d := json.NewDecoder(r.Body) + defer r.Body.Close() + + if err := d.Decode(&executor); err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + if _, err := store.PutExecutor(ctx, h.e, executor); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } +} + +type ExecutorTaskStatusHandler struct { + e *etcd.Store + c chan<- *types.ExecutorTask +} + +func NewExecutorTaskStatusHandler(e *etcd.Store, c chan<- *types.ExecutorTask) *ExecutorTaskStatusHandler { + return &ExecutorTaskStatusHandler{e: e, c: c} +} + +func (h *ExecutorTaskStatusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // TODO(sgotti) Check authorized call from executors + var et *types.ExecutorTask + d := json.NewDecoder(r.Body) + defer r.Body.Close() + + if err := d.Decode(&et); err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + if _, err := store.UpdateExecutorTaskStatus(ctx, h.e, et); err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + go func() { h.c <- et }() +} + +type ExecutorTaskHandler struct { + e *etcd.Store +} + +func NewExecutorTaskHandler(e *etcd.Store) *ExecutorTaskHandler { + return &ExecutorTaskHandler{e: e} +} + +func (h *ExecutorTaskHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + + // TODO(sgotti) Check authorized call from executors + etID := vars["taskid"] + if etID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + + et, err := store.GetExecutorTask(ctx, h.e, etID) + if err != nil && err != etcd.ErrKeyNotFound { + http.Error(w, "", http.StatusInternalServerError) + return + } + if et == nil { + http.Error(w, "", http.StatusNotFound) + return + } + + if err := json.NewEncoder(w).Encode(et); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } +} + +type ExecutorTasksHandler struct { + e *etcd.Store +} + +func NewExecutorTasksHandler(e *etcd.Store) *ExecutorTasksHandler { + return &ExecutorTasksHandler{e: e} +} + +func (h *ExecutorTasksHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + vars := mux.Vars(r) + + // TODO(sgotti) Check authorized call from executors + executorID := vars["executorid"] + if executorID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + + ets, err := store.GetExecutorTasks(ctx, h.e, executorID) + if err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(ets); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } +} + +type ArchivesHandler struct { + log *zap.SugaredLogger + lts *objectstorage.ObjStorage +} + +func NewArchivesHandler(logger *zap.Logger, lts *objectstorage.ObjStorage) *ArchivesHandler { + return &ArchivesHandler{ + log: logger.Sugar(), + lts: lts, + } +} + +func (h *ArchivesHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + // TODO(sgotti) Check authorized call from scheduler + + taskID := r.URL.Query().Get("taskid") + if taskID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + s := r.URL.Query().Get("step") + if s == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + step, err := strconv.Atoi(s) + if err != nil { + http.Error(w, "", http.StatusBadRequest) + return + } + + w.Header().Set("Cache-Control", "no-cache") + + if err := h.readArchive(taskID, step, w); err != nil { + switch err.(type) { + case common.ErrNotExist: + http.Error(w, err.Error(), http.StatusNotFound) + default: + http.Error(w, err.Error(), http.StatusInternalServerError) + } + return + } +} + +func (h *ArchivesHandler) readArchive(rtID string, step int, w io.Writer) error { + archivePath := store.LTSRunArchivePath(rtID, step) + f, err := h.lts.ReadObject(archivePath) + if err != nil { + if err == objectstorage.ErrNotExist { + return common.NewErrNotExist(err) + } + return err + } + defer f.Close() + + br := bufio.NewReader(f) + + _, err = io.Copy(w, br) + return err +} + +type ExecutorDeleteHandler struct { + log *zap.SugaredLogger + ch *command.CommandHandler +} + +func NewExecutorDeleteHandler(logger *zap.Logger, ch *command.CommandHandler) *ExecutorDeleteHandler { + return &ExecutorDeleteHandler{ + log: logger.Sugar(), + ch: ch, + } +} + +func (h *ExecutorDeleteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + vars := mux.Vars(r) + + // TODO(sgotti) Check authorized call from executors + executorID := vars["executorid"] + if executorID == "" { + http.Error(w, "", http.StatusBadRequest) + return + } + + if err := h.ch.DeleteExecutor(ctx, executorID); err != nil { + http.Error(w, "", http.StatusInternalServerError) + return + } +} diff --git a/internal/services/runservice/scheduler/command/command.go b/internal/services/runservice/scheduler/command/command.go new file mode 100644 index 0000000..cdd0371 --- /dev/null +++ b/internal/services/runservice/scheduler/command/command.go @@ -0,0 +1,277 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package command + +import ( + "context" + "time" + + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/runconfig" + "github.com/sorintlab/agola/internal/sequence" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/store" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + "github.com/sorintlab/agola/internal/wal" + + "github.com/pkg/errors" + "go.uber.org/zap" +) + +type CommandHandler struct { + log *zap.SugaredLogger + e *etcd.Store + lts *objectstorage.ObjStorage + wal *wal.WalManager +} + +func NewCommandHandler(logger *zap.Logger, e *etcd.Store, lts *objectstorage.ObjStorage, wal *wal.WalManager) *CommandHandler { + return &CommandHandler{ + log: logger.Sugar(), + e: e, + lts: lts, + wal: wal, + } +} + +type RunChangePhaseRequest struct { + RunID string + Phase types.RunPhase + ChangeGroupsUpdateToken string +} + +func (s *CommandHandler) ChangeRunPhase(ctx context.Context, req *RunChangePhaseRequest) error { + cgt, err := types.UnmarshalChangeGroupsUpdateToken(req.ChangeGroupsUpdateToken) + if err != nil { + return err + } + + r, _, err := store.GetRun(ctx, s.e, req.RunID) + if err != nil { + return err + } + + switch req.Phase { + case types.RunPhaseRunning: + if r.Phase != types.RunPhaseQueued { + return errors.Errorf("run %s is not queued but in %q phase", r.ID, r.Phase) + } + r.ChangePhase(types.RunPhaseRunning) + } + + _, err = store.AtomicPutRun(ctx, s.e, r, "", cgt) + return err +} + +type RunCreateRequest struct { + RunConfig *types.RunConfig + Group string + Environment map[string]string + Annotations map[string]string + ChangeGroupsUpdateToken string +} + +func (s *CommandHandler) CreateRun(ctx context.Context, req *RunCreateRequest) error { + runcgt, err := types.UnmarshalChangeGroupsUpdateToken(req.ChangeGroupsUpdateToken) + if err != nil { + return err + } + + rc := req.RunConfig + + // generate a new run sequence that will be the same for the run, runconfig and rundata + seq, err := sequence.IncSequence(ctx, s.e, common.EtcdRunSequenceKey) + if err != nil { + return err + } + id := seq.String() + + // TODO(sgotti) validate run config + if err := runconfig.CheckRunConfig(rc); err != nil { + return err + } + + // set the run config ID + rc.ID = id + + // generate tasks levels + if err := runconfig.GenTasksLevels(rc); err != nil { + return err + } + + rd := &types.RunData{ + ID: id, + Group: req.Group, + Environment: req.Environment, + Annotations: req.Annotations, + } + + run, err := s.genRun(ctx, rc, rd) + if err != nil { + return err + } + s.log.Debugf("created run: %s", util.Dump(run)) + + c, cgt, err := store.LTSGetRunCounter(s.wal, run.Group) + s.log.Infof("c: %d, cgt: %s", c, util.Dump(cgt)) + if err != nil && err != objectstorage.ErrNotExist { + return err + } + c++ + run.Counter = c + + actions := []*wal.Action{} + + // persist group counter + rca, err := store.LTSUpdateRunCounterAction(ctx, c, run.Group) + if err != nil { + return err + } + actions = append(actions, rca) + + // persist run config + rca, err = store.LTSSaveRunConfigAction(rc) + if err != nil { + return err + } + actions = append(actions, rca) + + // persist run data + rda, err := store.LTSSaveRunDataAction(rd) + if err != nil { + return err + } + actions = append(actions, rda) + + if _, err = s.wal.WriteWal(ctx, actions, cgt); err != nil { + return err + } + + if _, err := store.AtomicPutRun(ctx, s.e, run, common.RunEventTypeQueued, runcgt); err != nil { + return err + } + return nil +} + +func (s *CommandHandler) genRunTask(ctx context.Context, rct *types.RunConfigTask) *types.RunTask { + rt := &types.RunTask{ + ID: rct.ID, + Status: types.RunTaskStatusNotStarted, + Steps: make([]*types.RunTaskStep, len(rct.Steps)), + WorkspaceArchives: []int{}, + } + for i := range rt.Steps { + s := &types.RunTaskStep{ + Phase: types.ExecutorTaskPhaseNotStarted, + LogPhase: types.RunTaskFetchPhaseNotStarted, + } + rt.Steps[i] = s + } + for i, ps := range rct.Steps { + switch ps.(type) { + case *types.SaveToWorkspaceStep: + rt.WorkspaceArchives = append(rt.WorkspaceArchives, i) + } + } + rt.WorkspaceArchivesPhase = make([]types.RunTaskFetchPhase, len(rt.WorkspaceArchives)) + for i := range rt.WorkspaceArchivesPhase { + rt.WorkspaceArchivesPhase[i] = types.RunTaskFetchPhaseNotStarted + } + + return rt +} + +func (s *CommandHandler) genRun(ctx context.Context, rc *types.RunConfig, rd *types.RunData) (*types.Run, error) { + r := &types.Run{ + ID: rc.ID, + Name: rc.Name, + Group: rd.Group, + Annotations: rd.Annotations, + Phase: types.RunPhaseQueued, + Result: types.RunResultUnknown, + RunTasks: make(map[string]*types.RunTask), + EnqueueTime: util.TimePtr(time.Now()), + } + + for _, rct := range rc.Tasks { + rt := s.genRunTask(ctx, rct) + r.RunTasks[rt.ID] = rt + } + + return r, nil +} + +type RunTaskApproveRequest struct { + RunID string + TaskID string + ApprovalAnnotations map[string]string + ChangeGroupsUpdateToken string +} + +func (s *CommandHandler) ApproveRunTask(ctx context.Context, req *RunTaskApproveRequest) error { + cgt, err := types.UnmarshalChangeGroupsUpdateToken(req.ChangeGroupsUpdateToken) + if err != nil { + return err + } + + r, _, err := store.GetRun(ctx, s.e, req.RunID) + if err != nil { + return err + } + + task, ok := r.RunTasks[req.TaskID] + if !ok { + return errors.Errorf("run %q doesn't have task %q", r.ID, req.TaskID) + } + + if !task.WaitingApproval { + return errors.Errorf("run %q, task %q is not in waiting approval state", r.ID, req.TaskID) + } + + if !task.Approved { + return errors.Errorf("run %q, task %q is already approved", r.ID, req.TaskID) + } + + task.Approved = true + task.ApprovalAnnotations = req.ApprovalAnnotations + + _, err = store.AtomicPutRun(ctx, s.e, r, "", cgt) + return err +} + +func (s *CommandHandler) DeleteExecutor(ctx context.Context, executorID string) error { + // mark all executor tasks as failed + ets, err := store.GetExecutorTasks(ctx, s.e, executorID) + if err != nil { + return err + } + + for _, et := range ets { + et.Status.Phase = types.ExecutorTaskPhaseFailed + et.FailError = "executor deleted" + if _, err := store.AtomicPutExecutorTask(ctx, s.e, et); err != nil { + return err + } + } + + // delete the executor + if err := store.DeleteExecutor(ctx, s.e, executorID); err != nil { + return err + } + + return nil +} diff --git a/internal/services/runservice/scheduler/common/common.go b/internal/services/runservice/scheduler/common/common.go new file mode 100644 index 0000000..874d6b0 --- /dev/null +++ b/internal/services/runservice/scheduler/common/common.go @@ -0,0 +1,114 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "fmt" + "path" + "strings" +) + +type ErrNotExist struct { + err error +} + +func NewErrNotExist(err error) error { + return ErrNotExist{err: err} +} + +func (e ErrNotExist) Error() string { + return e.err.Error() +} + +var ( + EtcdRunsDir = "runs" + EtcdRunSequenceKey = "runsequence" + EtcdRunEventKey = "runevents" + EtcdRunEventSequenceKey = "runeventsequence" + + EtcdChangeGroupsDir = "changegroups" + EtcdChangeGroupMinRevisionKey = "changegroupsminrev" + + EtcdExecutorsDir = "executors" + EtcdTasksDir = "tasks" +) + +func EtcdRunKey(runID string) string { return path.Join(EtcdRunsDir, runID) } +func EtcdExecutorKey(taskID string) string { return path.Join(EtcdExecutorsDir, taskID) } +func EtcdTaskKey(taskID string) string { return path.Join(EtcdTasksDir, taskID) } + +const ( + EtcdChangeGroupMinRevisionRange = 100 +) + +var ( + StorageDataDir = "" + StorageRunsDir = path.Join(StorageDataDir, "runs") + StorageRunsDataDir = path.Join(StorageDataDir, "runsdata") + StorageRunsConfigDir = path.Join(StorageDataDir, "runsconfig") + StorageRunsIndexesDir = path.Join(StorageDataDir, "runsindexes") + StorageCountersDir = path.Join(StorageDataDir, "counters") +) + +const ( + etcdWalsMinRevisionRange = 100 +) + +func StorageRunFile(runID string) string { + return path.Join(StorageRunsDir, runID) +} + +func StorageRunDataFile(runID string) string { + return path.Join(StorageRunsDataDir, runID) +} + +func StorageRunConfigFile(runID string) string { + return path.Join(StorageRunsConfigDir, runID) +} + +func StorageCounterFile(group string) string { + return path.Join(StorageCountersDir, group) +} + +type ConfigType int + +const ( + ConfigTypeRun ConfigType = iota + 1 + ConfigTypeRunData + ConfigTypeRunConfig + ConfigTypeCounter +) + +func PathToTypeID(p string) (ConfigType, string) { + var configType ConfigType + switch path.Dir(p) { + case StorageRunsDir: + configType = ConfigTypeRun + case StorageRunsDataDir: + configType = ConfigTypeRunData + case StorageRunsConfigDir: + configType = ConfigTypeRunConfig + } + + if strings.HasPrefix(p, StorageCountersDir+"/") { + configType = ConfigTypeCounter + } + + if configType == 0 { + panic(fmt.Errorf("cannot determine configtype for path: %q", p)) + } + + return configType, path.Base(p) +} diff --git a/internal/services/runservice/scheduler/common/events.go b/internal/services/runservice/scheduler/common/events.go new file mode 100644 index 0000000..c0aa1be --- /dev/null +++ b/internal/services/runservice/scheduler/common/events.go @@ -0,0 +1,46 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "context" + + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/sequence" +) + +type RunEventType string + +const ( + RunEventTypeQueued RunEventType = "queued" + RunEventTypeCancelled RunEventType = "cancelled" + RunEventTypeRunning RunEventType = "running" + RunEventTypeSuccess RunEventType = "success" + RunEventTypeFailed RunEventType = "failed" +) + +type RunEvent struct { + Sequence string + EventType RunEventType + RunID string +} + +func NewRunEvent(ctx context.Context, e *etcd.Store, runEventType RunEventType, runID string) (*RunEvent, error) { + seq, err := sequence.IncSequence(ctx, e, EtcdRunEventSequenceKey) + if err != nil { + return nil, err + } + return &RunEvent{Sequence: seq.String(), EventType: runEventType, RunID: runID}, nil +} diff --git a/internal/services/runservice/scheduler/readdb/create.go b/internal/services/runservice/scheduler/readdb/create.go new file mode 100644 index 0000000..dd059c6 --- /dev/null +++ b/internal/services/runservice/scheduler/readdb/create.go @@ -0,0 +1,41 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package readdb + +var Stmts = []string{ + // last processed etcd event revision + //"create table revision (clusterid varchar, revision bigint, PRIMARY KEY(revision))", + "create table revision (revision bigint, PRIMARY KEY(revision))", + + "create table run (id varchar, data bytea, phase varchar, PRIMARY KEY (id))", + "create index run_phase on run(phase)", + + // rungroup stores the groups associated to a run + "create table rungroup (runid varchar, grouppath varchar, PRIMARY KEY (runid, grouppath), FOREIGN KEY(runid) REFERENCES run(id) ON DELETE CASCADE)", + "create index rungroup_grouppath on rungroup(grouppath)", + + "create table runevent (sequence varchar, data bytea, PRIMARY KEY (sequence))", + + // changegrouprevision stores the current revision of the changegroup for optimistic locking + "create table changegrouprevision (id varchar, revision varchar, PRIMARY KEY (id, revision))", + + // LTS + "create table run_lts (id varchar, data bytea, phase varchar, PRIMARY KEY (id))", + "create index run_lts_phase on run_lts(phase)", + + // rungroup stores the groups associated to a run + "create table rungroup_lts (runid varchar, grouppath varchar, PRIMARY KEY (runid, grouppath), FOREIGN KEY(runid) REFERENCES run_lts(id) ON DELETE CASCADE)", + "create index rungroup_lts_grouppath on rungroup_lts(grouppath)", +} diff --git a/internal/services/runservice/scheduler/readdb/readdb.go b/internal/services/runservice/scheduler/readdb/readdb.go new file mode 100644 index 0000000..eb75edf --- /dev/null +++ b/internal/services/runservice/scheduler/readdb/readdb.go @@ -0,0 +1,982 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package readdb + +import ( + "context" + "database/sql" + "encoding/json" + "fmt" + "os" + "path" + "path/filepath" + "sort" + "strings" + "sync" + "time" + + "github.com/sorintlab/agola/internal/db" + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/sequence" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/store" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + "github.com/sorintlab/agola/internal/wal" + "go.uber.org/zap" + + sq "github.com/Masterminds/squirrel" + "github.com/pkg/errors" + etcdclientv3 "go.etcd.io/etcd/clientv3" + etcdclientv3rpc "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" + "go.etcd.io/etcd/mvcc/mvccpb" +) + +const ( + MaxFetchSize = 25 +) + +var ( + // Use postgresql $ placeholder. It'll be converted to ? from the provided db functions + sb = sq.StatementBuilder.PlaceholderFormat(sq.Dollar) + + revisionSelect = sb.Select("revision").From("revision") + revisionInsert = sb.Insert("revision").Columns("revision") + + runSelect = sb.Select("data").From("run") + runInsert = sb.Insert("run").Columns("id", "data", "phase") + + rungroupSelect = sb.Select("runid", "grouppath").From("rungroup") + rungroupInsert = sb.Insert("rungroup").Columns("runid", "grouppath") + + runeventSelect = sb.Select("data").From("runevent") + runeventInsert = sb.Insert("runevent").Columns("sequence", "data") + + changegrouprevisionSelect = sb.Select("id, revision").From("changegrouprevision") + changegrouprevisionInsert = sb.Insert("changegrouprevision").Columns("id", "revision") + + runLTSSelect = sb.Select("id").From("run_lts") + runLTSInsert = sb.Insert("run_lts").Columns("id", "data", "phase") + + rungroupLTSSelect = sb.Select("runid", "grouppath").From("rungroup_lts") + rungroupLTSInsert = sb.Insert("rungroup_lts").Columns("runid", "grouppath") +) + +type ReadDB struct { + log *zap.SugaredLogger + dataDir string + e *etcd.Store + rdb *db.DB + wal *wal.WalManager + + Initialized bool + initMutex sync.Mutex +} + +func NewReadDB(ctx context.Context, logger *zap.Logger, dataDir string, e *etcd.Store, wal *wal.WalManager) (*ReadDB, error) { + if err := os.MkdirAll(dataDir, 0770); err != nil { + return nil, err + } + rdb, err := db.NewDB(db.Sqlite3, filepath.Join(dataDir, "db")) + if err != nil { + return nil, err + } + + // populate readdb + if err := rdb.Create(Stmts); err != nil { + return nil, err + } + + readDB := &ReadDB{ + log: logger.Sugar(), + e: e, + dataDir: dataDir, + wal: wal, + rdb: rdb, + } + + revision, err := readDB.GetRevision() + if err != nil { + return nil, err + } + + if revision == 0 { + if err := readDB.Initialize(ctx); err != nil { + return nil, err + } + } + + readDB.Initialized = true + + return readDB, nil +} + +// Initialize populates the readdb with the current etcd data and save the +// revision to then feed it with the etcd events +func (r *ReadDB) Initialize(ctx context.Context) error { + r.log.Infof("initialize") + r.rdb.Close() + + // drop rdb + if err := os.Remove(filepath.Join(r.dataDir, "db")); err != nil { + return err + } + + rdb, err := db.NewDB(db.Sqlite3, filepath.Join(r.dataDir, "db")) + if err != nil { + return err + } + + // populate readdb + if err := rdb.Create(Stmts); err != nil { + return err + } + + r.rdb = rdb + + // then sync the rdb + for { + if err := r.SyncRDB(ctx); err != nil { + r.log.Errorf("error syncing run db: %+v, retrying", err) + } else { + break + } + time.Sleep(2 * time.Second) + } + + r.Initialized = true + + return nil +} + +func (r *ReadDB) SyncRDB(ctx context.Context) error { + err := r.rdb.Do(func(tx *db.Tx) error { + // Do pagination to limit the number of keys per request + var revision int64 + key := common.EtcdRunsDir + + var continuation *etcd.ListPagedContinuation + for { + listResp, err := r.e.ListPaged(ctx, key, 0, 10, continuation) + if err != nil { + return err + } + resp := listResp.Resp + continuation = listResp.Continuation + r.log.Infof("continuation: %s", util.Dump(continuation)) + + if revision == 0 { + revision = resp.Header.Revision + } + + for _, kv := range resp.Kvs { + r.log.Infof("key: %s", kv.Key) + var run *types.Run + if err := json.Unmarshal(kv.Value, &run); err != nil { + return err + } + run.Revision = kv.ModRevision + + if err := insertRun(tx, run, kv.Value); err != nil { + return err + } + } + + if !listResp.HasMore { + break + } + } + + // use the same revision + key = common.EtcdChangeGroupsDir + continuation = nil + for { + listResp, err := r.e.ListPaged(ctx, key, revision, 10, continuation) + if err != nil { + return err + } + resp := listResp.Resp + continuation = listResp.Continuation + + for _, kv := range resp.Kvs { + changegroupID := path.Base(string(kv.Key)) + + if err := insertChangeGroupRevision(tx, changegroupID, kv.ModRevision); err != nil { + return err + } + } + + if err := insertRevision(tx, revision); err != nil { + return err + } + + if !listResp.HasMore { + break + } + } + + return nil + }) + + return err +} + +func (r *ReadDB) SyncLTSRuns(tx *db.Tx, groupID, startRunID string, limit int, sortOrder types.SortOrder) error { + doneCh := make(chan struct{}) + defer close(doneCh) + + //q, args, err := rungroupSelect.Where(sq.Eq{"grouppath": groupID}).Limit(1).ToSql() + //r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + //if err != nil { + // return errors.Wrap(err, "failed to build query") + //} + //hasRow := false + //err = tx.Do(func(tx *db.Tx) error { + // rows, err := tx.Query(q, args...) + // if err != nil { + // return err + // } + // defer rows.Close() + + // for rows.Next() { + // hasRow = true + // break + // } + // if err := rows.Err(); err != nil { + // return err + // } + // return nil + //}) + + //// this means that this rungroup is in sync + //if hasRow { + // return nil + //} + + insertfunc := func(runs []*types.Run) error { + for _, run := range runs { + if err := r.insertRunLTS(tx, run, []byte{}); err != nil { + return err + } + } + return nil + } + + runs := []*types.Run{} + count := 0 + var start string + if startRunID != "" { + start = store.LTSIndexRunIDOrderPath(groupID, startRunID, sortOrder) + } + for object := range r.wal.List(store.LTSIndexRunIDOrderDir(groupID, sortOrder), start, true, doneCh) { + //r.log.Infof("path: %q", object.Path) + if object.Err != nil { + if object.Err == objectstorage.ErrNotExist { + break + } + return object.Err + } + + runObj := common.StorageRunFile(path.Base(object.Path)) + f, _, err := r.wal.ReadObject(runObj, nil) + if err != nil && err != objectstorage.ErrNotExist { + return err + } + if err != objectstorage.ErrNotExist { + var run *types.Run + e := json.NewDecoder(f) + if err := e.Decode(&run); err != nil { + f.Close() + return err + } + f.Close() + + runs = append(runs, run) + } + + if count > 100 { + if err := insertfunc(runs); err != nil { + return err + } + count = 0 + runs = []*types.Run{} + } else { + count++ + } + if count > limit { + break + } + } + if err := insertfunc(runs); err != nil { + return err + } + + return nil +} + +func (r *ReadDB) Run(ctx context.Context) { + for { + if err := r.HandleEvents(ctx); err != nil { + r.log.Errorf("handleevents err: %+v", err) + } + if !r.Initialized { + r.Initialize(ctx) + } + + select { + case <-ctx.Done(): + r.log.Infof("readdb exiting") + return + default: + } + + time.Sleep(1 * time.Second) + } +} + +func (r *ReadDB) HandleEvents(ctx context.Context) error { + var revision int64 + var lastRuns []*types.Run + err := r.rdb.Do(func(tx *db.Tx) error { + var err error + revision, err = r.getRevision(tx) + if err != nil { + return err + } + lastRuns, err = r.GetActiveRuns(tx, nil, nil, "", 1, types.SortOrderDesc) + return err + }) + if err != nil { + return err + } + + runSequence, _, err := sequence.CurSequence(ctx, r.e, common.EtcdRunSequenceKey) + if err != nil { + return err + } + + var lastRun *types.Run + if len(lastRuns) > 0 { + lastRun = lastRuns[0] + } + if lastRun != nil { + if runSequence == nil { + r.Initialized = false + return errors.Errorf("no runsequence in etcd, reinitializing.") + } + + lastRunSequence, err := sequence.Parse(lastRun.ID) + if err != nil { + return err + } + // check that the run sequence epoch isn't different than the current one (this means etcd + // has been reset, or worst, restored from a backup or manually deleted) + if runSequence == nil || runSequence.Epoch != lastRunSequence.Epoch { + r.Initialized = false + return errors.Errorf("last run epoch %d is different than current epoch in etcd %d, reinitializing.", lastRunSequence.Epoch, runSequence.Epoch) + } + } + + wctx, cancel := context.WithCancel(ctx) + defer cancel() + wctx = etcdclientv3.WithRequireLeader(wctx) + wch := r.e.Watch(wctx, "", revision+1) + for wresp := range wch { + if wresp.Canceled { + err = wresp.Err() + if err == etcdclientv3rpc.ErrCompacted { + r.log.Errorf("required events already compacted, reinitializing readdb") + r.Initialized = false + } + return errors.Wrapf(err, "watch error") + } + + // a single transaction for every response (every response contains all the + // events happened in an etcd revision). + err = r.rdb.Do(func(tx *db.Tx) error { + + for _, ev := range wresp.Events { + if err := r.handleEvent(tx, ev, &wresp); err != nil { + return err + } + + if err := insertRevision(tx, ev.Kv.ModRevision); err != nil { + return err + } + } + return nil + }) + if err != nil { + return err + } + } + + return nil +} + +func (r *ReadDB) handleEvent(tx *db.Tx, ev *etcdclientv3.Event, wresp *etcdclientv3.WatchResponse) error { + r.log.Debugf("event: %s %q : %q\n", ev.Type, ev.Kv.Key, ev.Kv.Value) + key := string(ev.Kv.Key) + switch { + case strings.HasPrefix(key, common.EtcdRunsDir+"/"): + return r.handleRunEvent(tx, ev, wresp) + + case strings.HasPrefix(key, common.EtcdChangeGroupsDir+"/"): + return r.handleChangeGroupEvent(tx, ev, wresp) + + case key == common.EtcdRunEventKey: + return r.handleRunsEventEvent(tx, ev, wresp) + + default: + return nil + } +} + +func (r *ReadDB) handleRunEvent(tx *db.Tx, ev *etcdclientv3.Event, wresp *etcdclientv3.WatchResponse) error { + switch ev.Type { + case mvccpb.PUT: + var run *types.Run + if err := json.Unmarshal(ev.Kv.Value, &run); err != nil { + return errors.Wrap(err, "failed to unmarshal run") + } + run.Revision = ev.Kv.ModRevision + + return insertRun(tx, run, ev.Kv.Value) + + case mvccpb.DELETE: + runID := path.Base(string(ev.Kv.Key)) + + if _, err := tx.Exec("delete from run where id = $1", runID); err != nil { + return errors.Wrap(err, "failed to delete run") + } + + // Run has been deleted from etcd, this means that it was stored in the LTS + run, err := store.LTSGetRun(r.wal, runID) + if err != nil { + return err + } + + return r.insertRunLTS(tx, run, []byte{}) + } + + return nil +} + +func (r *ReadDB) handleRunsEventEvent(tx *db.Tx, ev *etcdclientv3.Event, wresp *etcdclientv3.WatchResponse) error { + switch ev.Type { + case mvccpb.PUT: + var runEvent *common.RunEvent + if err := json.Unmarshal(ev.Kv.Value, &runEvent); err != nil { + return errors.Wrap(err, "failed to unmarshal run") + } + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from runevent where sequence = $1", runEvent.Sequence); err != nil { + return errors.Wrap(err, "failed to delete run") + } + q, args, err := runeventInsert.Values(runEvent.Sequence, ev.Kv.Value).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + } + + return nil +} + +func (r *ReadDB) handleChangeGroupEvent(tx *db.Tx, ev *etcdclientv3.Event, wresp *etcdclientv3.WatchResponse) error { + changegroupID := path.Base(string(ev.Kv.Key)) + + switch ev.Type { + case mvccpb.PUT: + return insertChangeGroupRevision(tx, changegroupID, ev.Kv.ModRevision) + + case mvccpb.DELETE: + if _, err := tx.Exec("delete from changegrouprevision where id = $1", changegroupID); err != nil { + return errors.Wrap(err, "failed to delete change group revision") + } + } + + return nil +} + +func (r *ReadDB) Do(f func(tx *db.Tx) error) error { + return r.rdb.Do(f) +} + +func insertRevision(tx *db.Tx, revision int64) error { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from revision"); err != nil { + return errors.Wrap(err, "failed to delete run") + } + // TODO(sgotti) go database/sql and mattn/sqlite3 don't support uint64 types... + //q, args, err = revisionInsert.Values(int64(wresp.Header.ClusterId), run.Revision).ToSql() + q, args, err := revisionInsert.Values(revision).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return errors.WithStack(err) + } + return nil +} + +func insertRun(tx *db.Tx, run *types.Run, data []byte) error { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from run where id = $1", run.ID); err != nil { + return errors.Wrap(err, "failed to delete run") + } + q, args, err := runInsert.Values(run.ID, data, run.Phase).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + + groupPaths := []string{} + p := run.Group + for { + groupPaths = append(groupPaths, p) + prevp := p + p = path.Dir(p) + if p == prevp { + break + } + } + + for _, groupPath := range groupPaths { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from rungroup where runID = $1 and grouppath = $2", run.ID, groupPath); err != nil { + return errors.Wrap(err, "failed to delete rungroup") + } + q, args, err := rungroupInsert.Values(run.ID, groupPath).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + } + + return nil +} + +func (r *ReadDB) insertRunLTS(tx *db.Tx, run *types.Run, data []byte) error { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from run_lts where id = $1", run.ID); err != nil { + return errors.Wrap(err, "failed to delete run lts") + } + q, args, err := runLTSInsert.Values(run.ID, data, run.Phase).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + + groupPaths := []string{} + p := run.Group + for { + groupPaths = append(groupPaths, p) + prevp := p + p = path.Dir(p) + if p == prevp { + break + } + } + + for _, groupPath := range groupPaths { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from rungroup_lts where runID = $1 and grouppath = $2", run.ID, groupPath); err != nil { + return errors.Wrap(err, "failed to delete rungroup") + } + q, args, err := rungroupLTSInsert.Values(run.ID, groupPath).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + } + + return nil +} + +func insertChangeGroupRevision(tx *db.Tx, changegroupID string, revision int64) error { + // poor man insert or update that works because transaction isolation level is serializable + if _, err := tx.Exec("delete from changegrouprevision where id = $1", changegroupID); err != nil { + return errors.Wrap(err, "failed to delete run") + } + q, args, err := changegrouprevisionInsert.Values(changegroupID, revision).ToSql() + if err != nil { + return errors.Wrap(err, "failed to build query") + } + if _, err = tx.Exec(q, args...); err != nil { + return err + } + return nil +} + +func (r *ReadDB) GetRevision() (int64, error) { + var revision int64 + + err := r.rdb.Do(func(tx *db.Tx) error { + var err error + revision, err = r.getRevision(tx) + return err + }) + return revision, err +} + +func (r *ReadDB) getRevision(tx *db.Tx) (int64, error) { + var revision int64 + + q, args, err := revisionSelect.ToSql() + r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + if err != nil { + return 0, errors.Wrap(err, "failed to build query") + } + + if err := tx.QueryRow(q, args...).Scan(&revision); err == sql.ErrNoRows { + return 0, nil + } + return revision, err +} + +func (r *ReadDB) GetChangeGroupsUpdateTokens(tx *db.Tx, groups []string) (*types.ChangeGroupsUpdateToken, error) { + s := changegrouprevisionSelect.Where(sq.Eq{"id": groups}) + q, args, err := s.ToSql() + r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + if err != nil { + return nil, errors.Wrap(err, "failed to build query") + } + changeGroupsRevisions, err := fetchChangeGroupsRevision(tx, q, args...) + if err != nil { + return nil, err + } + + revision, err := r.getRevision(tx) + if err != nil { + return nil, err + } + + // for non existing changegroups use a changegroup with revision = 0 + for _, g := range groups { + if _, ok := changeGroupsRevisions[g]; !ok { + changeGroupsRevisions[g] = 0 + } + } + + return &types.ChangeGroupsUpdateToken{CurRevision: revision, ChangeGroupsRevisions: changeGroupsRevisions}, nil +} + +func (r *ReadDB) GetActiveRuns(tx *db.Tx, groups []string, phaseFilter []types.RunPhase, startRunID string, limit int, sortOrder types.SortOrder) ([]*types.Run, error) { + return r.getRunsFilteredActive(tx, groups, phaseFilter, startRunID, limit, sortOrder) +} + +func (r *ReadDB) PrefetchRuns(tx *db.Tx, groups []string, phaseFilter []types.RunPhase, startRunID string, limit int, sortOrder types.SortOrder) error { + useLTS := false + for _, phase := range phaseFilter { + if phase == types.RunPhaseFinished { + useLTS = true + } + } + if len(phaseFilter) == 0 { + useLTS = true + } + if !useLTS { + return nil + } + + for _, group := range groups { + err := r.SyncLTSRuns(tx, group, startRunID, limit, sortOrder) + if err != nil { + return errors.Wrap(err, "failed to sync runs from lts") + } + } + return nil +} + +func (r *ReadDB) GetRuns(tx *db.Tx, groups []string, phaseFilter []types.RunPhase, startRunID string, limit int, sortOrder types.SortOrder) ([]*types.Run, error) { + useLTS := false + for _, phase := range phaseFilter { + if phase == types.RunPhaseFinished { + useLTS = true + } + } + if len(phaseFilter) == 0 { + useLTS = true + } + + runs, err := r.getRunsFilteredActive(tx, groups, phaseFilter, startRunID, limit, sortOrder) + if err != nil { + return nil, err + } + if !useLTS { + return runs, err + } + + // skip if the phase requested is not finished + runsltsIDs, err := r.getRunsFilteredLTS(tx, groups, startRunID, limit, sortOrder) + if err != nil { + return nil, err + } + + runsMap := map[string]*types.Run{} + for _, r := range runs { + runsMap[r.ID] = r + } + for _, runID := range runsltsIDs { + if _, ok := runsMap[runID]; !ok { + runsMap[runID] = nil + } + } + + var keys []string + for k := range runsMap { + keys = append(keys, k) + } + switch sortOrder { + case types.SortOrderAsc: + sort.Sort(sort.StringSlice(keys)) + case types.SortOrderDesc: + sort.Sort(sort.Reverse(sort.StringSlice(keys))) + } + + aruns := make([]*types.Run, 0, len(runsMap)) + + count := 0 + for _, runID := range keys { + if count >= limit { + break + } + count++ + + run := runsMap[runID] + if run != nil { + aruns = append(aruns, run) + continue + } + + // get run from lts + run, err = store.LTSGetRun(r.wal, runID) + if err != nil { + return nil, errors.WithStack(err) + } + + aruns = append(aruns, run) + } + + return aruns, nil +} + +func (r *ReadDB) getRunsFilteredQuery(phaseFilter []types.RunPhase, groups []string, startRunID string, limit int, sortOrder types.SortOrder, lts bool) sq.SelectBuilder { + runt := "run" + runlabelt := "rungroup" + fields := []string{"data"} + if lts { + runt = "run_lts" + runlabelt = "rungroup_lts" + fields = []string{"id"} + } + + r.log.Debugf("runt: %s", runt) + s := sb.Select(fields...).From(runt + " as run") + switch sortOrder { + case types.SortOrderAsc: + s = s.OrderBy("run.id asc") + case types.SortOrderDesc: + s = s.OrderBy("run.id desc") + } + if len(phaseFilter) > 0 { + s = s.Where(sq.Eq{"phase": phaseFilter}) + } + if startRunID != "" { + switch sortOrder { + case types.SortOrderAsc: + s = s.Where(sq.Gt{"run.id": startRunID}) + case types.SortOrderDesc: + s = s.Where(sq.Lt{"run.id": startRunID}) + } + } + if limit > 0 { + s = s.Limit(uint64(limit)) + } + + if len(groups) > 0 { + s = s.Join(fmt.Sprintf("%s as rungroup on rungroup.runid = run.id", runlabelt)) + cond := sq.Or{} + for _, group := range groups { + cond = append(cond, sq.Eq{"rungroup.grouppath": group}) + } + s = s.Where(sq.Or{cond}) + } + + return s +} + +func (r *ReadDB) getRunsFilteredActive(tx *db.Tx, groups []string, phaseFilter []types.RunPhase, startRunID string, limit int, sortOrder types.SortOrder) ([]*types.Run, error) { + s := r.getRunsFilteredQuery(phaseFilter, groups, startRunID, limit, sortOrder, false) + + q, args, err := s.ToSql() + r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + if err != nil { + return nil, errors.Wrap(err, "failed to build query") + } + + return fetchRuns(tx, q, args...) +} + +func (r *ReadDB) getRunsFilteredLTS(tx *db.Tx, groups []string, startRunID string, limit int, sortOrder types.SortOrder) ([]string, error) { + s := r.getRunsFilteredQuery(nil, groups, startRunID, limit, sortOrder, true) + + q, args, err := s.ToSql() + r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + if err != nil { + return nil, errors.Wrap(err, "failed to build query") + } + + return fetchRunsLTS(tx, q, args...) +} + +func (r *ReadDB) GetRun(runID string) (*types.Run, error) { + var run *types.Run + + err := r.rdb.Do(func(tx *db.Tx) error { + var err error + run, err = r.getRun(tx, runID) + return err + }) + return run, err +} + +func (r *ReadDB) getRun(tx *db.Tx, runID string) (*types.Run, error) { + q, args, err := runSelect.Where(sq.Eq{"id": runID}).ToSql() + r.log.Debugf("q: %s, args: %s", q, util.Dump(args)) + if err != nil { + return nil, errors.Wrap(err, "failed to build query") + } + + runs, err := fetchRuns(tx, q, args...) + if err != nil { + return nil, errors.WithStack(err) + } + if len(runs) > 1 { + return nil, errors.Errorf("too many rows returned") + } + if len(runs) == 0 { + return nil, nil + } + return runs[0], nil +} + +func fetchRuns(tx *db.Tx, q string, args ...interface{}) ([]*types.Run, error) { + rows, err := tx.Query(q, args...) + if err != nil { + return nil, err + } + defer rows.Close() + return scanRuns(rows) +} + +func fetchRunsLTS(tx *db.Tx, q string, args ...interface{}) ([]string, error) { + rows, err := tx.Query(q, args...) + if err != nil { + return nil, err + } + defer rows.Close() + return scanRunsLTS(rows) +} + +func scanRun(rows *sql.Rows) (*types.Run, error) { + var data []byte + if err := rows.Scan(&data); err != nil { + return nil, errors.Wrap(err, "failed to scan rows") + } + var run *types.Run + if err := json.Unmarshal(data, &run); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal run") + } + + return run, nil +} + +func scanRunLTS(rows *sql.Rows) (string, error) { + var id string + if err := rows.Scan(&id); err != nil { + return "", errors.Wrap(err, "failed to scan rows") + } + return id, nil +} + +func scanRuns(rows *sql.Rows) ([]*types.Run, error) { + runs := []*types.Run{} + for rows.Next() { + r, err := scanRun(rows) + if err != nil { + return nil, err + } + runs = append(runs, r) + } + if err := rows.Err(); err != nil { + return nil, err + } + return runs, nil +} + +func scanRunsLTS(rows *sql.Rows) ([]string, error) { + ids := []string{} + for rows.Next() { + r, err := scanRunLTS(rows) + if err != nil { + return nil, err + } + ids = append(ids, r) + } + if err := rows.Err(); err != nil { + return nil, err + } + return ids, nil +} + +func fetchChangeGroupsRevision(tx *db.Tx, q string, args ...interface{}) (types.ChangeGroupsRevisions, error) { + rows, err := tx.Query(q, args...) + if err != nil { + return nil, err + } + defer rows.Close() + return scanChangeGroupsRevision(rows) +} + +func scanChangeGroupsRevision(rows *sql.Rows) (map[string]int64, error) { + changegroups := map[string]int64{} + for rows.Next() { + var ( + id string + revision int64 + ) + if err := rows.Scan(&id, &revision); err != nil { + return nil, errors.Wrap(err, "failed to scan rows") + } + changegroups[id] = revision + } + if err := rows.Err(); err != nil { + return nil, err + } + return changegroups, nil +} diff --git a/internal/services/runservice/scheduler/scheduler.go b/internal/services/runservice/scheduler/scheduler.go new file mode 100644 index 0000000..c2442e8 --- /dev/null +++ b/internal/services/runservice/scheduler/scheduler.go @@ -0,0 +1,1241 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package scheduler + +import ( + "bytes" + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net/http" + "os" + "path/filepath" + "time" + + scommon "github.com/sorintlab/agola/internal/common" + "github.com/sorintlab/agola/internal/etcd" + slog "github.com/sorintlab/agola/internal/log" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/runconfig" + "github.com/sorintlab/agola/internal/services/config" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/api" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/command" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/readdb" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/store" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + "github.com/sorintlab/agola/internal/wal" + + ghandlers "github.com/gorilla/handlers" + "github.com/gorilla/mux" + "github.com/pkg/errors" + etcdclientv3 "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/clientv3/concurrency" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +var level = zap.NewAtomicLevelAt(zapcore.InfoLevel) +var logger = slog.New(level) +var log = logger.Sugar() + +func mergeEnv(dest, src map[string]string) { + for k, v := range src { + dest[k] = v + } +} + +func (s *Scheduler) runHasActiveTasks(ctx context.Context, runID string) (bool, error) { + // the real source of active tasks is the number of executor tasks in etcd + // we can't rely on RunTask.Status since it's only updated when receiveing + // updated from the executor so it could be in a NotStarted state but have an + // executor tasks scheduled and running + ets, err := store.GetExecutorTasksForRun(ctx, s.e, runID) + if err != nil { + return false, err + } + activeTasks := false + for _, et := range ets { + if !et.Status.Phase.IsFinished() { + activeTasks = true + } + } + + return activeTasks, nil +} + +func (s *Scheduler) advanceRunTasks(ctx context.Context, r *types.Run) error { + log.Debugf("run: %s", util.Dump(r)) + rc, err := store.LTSGetRunConfig(s.wal, r.ID) + if err != nil { + return errors.Wrapf(err, "cannot get run config %q from etcd", r.ID) + } + log.Debugf("rc: %s", util.Dump(rc)) + rd, err := store.LTSGetRunData(s.wal, r.ID) + if err != nil { + return errors.Wrapf(err, "cannot get run data %q from etcd", r.ID) + } + log.Debugf("rd: %s", util.Dump(rd)) + + tasksToRun := []*types.RunTask{} + // get tasks that can be executed + for _, rt := range r.RunTasks { + log.Debugf("rt: %s", util.Dump(rt)) + if rt.Status != types.RunTaskStatusNotStarted { + continue + } + + rct := rc.Tasks[rt.ID] + parents := runconfig.GetParents(rc, rct) + canRun := true + for _, p := range parents { + rp := r.RunTasks[p.ID] + canRun = rp.Status.IsFinished() && rp.ArchivesFetchFinished() + } + + if canRun { + if !rt.WaitingApproval && rct.NeedsApproval { + rt.WaitingApproval = true + } else { + tasksToRun = append(tasksToRun, rt) + } + } + } + + // save run since we may have changed some run tasks to waiting approval + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + + log.Debugf("tasksToRun: %s", util.Dump(tasksToRun)) + + for _, rt := range tasksToRun { + et, err := s.genExecutorTask(ctx, r, rt, rc, rd) + if err != nil { + return err + } + log.Debugf("et: %s", util.Dump(et)) + + // check that the executorTask wasn't already scheduled + // just a check but it's not really needed since the call to + // atomicPutExecutorTask will fail if it already exists + tet, err := store.GetExecutorTask(ctx, s.e, et.ID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if tet != nil { + continue + } + if _, err := store.AtomicPutExecutorTask(ctx, s.e, et); err != nil { + return err + } + // try to send executor task to executor, if this fails the executor will + // periodically fetch the executortask anyway + if err := s.sendExecutorTask(ctx, et); err != nil { + return err + } + } + + return nil +} + +// chooseExecutor chooses the executor to schedule the task on. Now it's a very simple/dumb selection +// TODO(sgotti) improve this to use executor statistic, labels (arch type) etc... +func (s *Scheduler) chooseExecutor(ctx context.Context) (*types.Executor, error) { + executors, err := store.GetExecutors(ctx, s.e) + if err != nil { + return nil, err + } + for _, e := range executors { + return e, nil + } + return nil, nil +} + +func (s *Scheduler) genExecutorTask(ctx context.Context, r *types.Run, rt *types.RunTask, rc *types.RunConfig, rd *types.RunData) (*types.ExecutorTask, error) { + executor, err := s.chooseExecutor(ctx) + if err != nil { + return nil, err + } + if executor == nil { + return nil, errors.Errorf("cannot choose an executor") + } + + rct := rc.Tasks[rt.ID] + + environment := map[string]string{} + if rct.Environment != nil { + environment = rct.Environment + } + mergeEnv(environment, rc.Environment) + // run data Environment variables ovverride every other environment variable + mergeEnv(environment, rd.Environment) + + et := &types.ExecutorTask{ + // The executorTask ID must be the same as the runTask ID so we can detect if + // there's already an executorTask scheduled for that run task and we can get + // at most once task execution + ID: rt.ID, + RunID: r.ID, + TaskName: rct.Name, + Containers: rct.Runtime.Containers, + Environment: environment, + WorkingDir: rct.WorkingDir, + Shell: rct.Shell, + User: rct.User, + Steps: rct.Steps, + Status: types.ExecutorTaskStatus{ + Phase: types.ExecutorTaskPhaseNotStarted, + Steps: make([]*types.ExecutorTaskStepStatus, len(rct.Steps)), + ExecutorID: executor.ID, + }, + } + + for i := range et.Status.Steps { + et.Status.Steps[i] = &types.ExecutorTaskStepStatus{ + Phase: types.ExecutorTaskPhaseNotStarted, + } + } + + // calculate workspace layers + ws := make(types.Workspace, rct.Level+1) + rctAllParents := runconfig.GetAllParents(rc, rct) + log.Debugf("rctAllParents: %s", util.Dump(rctAllParents)) + for _, rctParent := range rctAllParents { + log.Debugf("rctParent: %s", util.Dump(rctParent)) + log.Debugf("ws: %s", util.Dump(ws)) + archives := []types.WorkspaceArchive{} + for _, archiveStep := range r.RunTasks[rctParent.ID].WorkspaceArchives { + archives = append(archives, types.WorkspaceArchive{TaskID: rctParent.ID, Step: archiveStep}) + } + log.Debugf("archives: %v", util.Dump(archives)) + if len(archives) > 0 { + ws[rctParent.Level] = append(ws[rctParent.Level], archives) + } + } + log.Debugf("ws: %s", util.Dump(ws)) + + et.Workspace = ws + + return et, nil +} + +func (s *Scheduler) sendExecutorTask(ctx context.Context, et *types.ExecutorTask) error { + executor, err := store.GetExecutor(ctx, s.e, et.Status.ExecutorID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if executor == nil { + log.Warnf("executor with id %q doesn't exist", et.Status.ExecutorID) + return nil + } + + etj, err := json.Marshal(et) + if err != nil { + return err + } + + r, err := http.Post(executor.ListenURL+"/api/v1alpha/executor", "", bytes.NewReader(etj)) + if err != nil { + return err + } + if r.StatusCode != http.StatusOK { + return errors.Errorf("received http status: %d", r.StatusCode) + } + + return nil +} + +func (s *Scheduler) compactChangeGroupsLoop(ctx context.Context) { + for { + if err := s.compactChangeGroups(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(1 * time.Second) + } +} + +func (s *Scheduler) compactChangeGroups(ctx context.Context) error { + resp, err := s.e.Client().Get(ctx, common.EtcdChangeGroupMinRevisionKey) + if err != nil { + return err + } + + revision := resp.Kvs[0].ModRevision + + // first update minrevision + cmp := etcdclientv3.Compare(etcdclientv3.ModRevision(common.EtcdChangeGroupMinRevisionKey), "=", revision) + then := etcdclientv3.OpPut(common.EtcdChangeGroupMinRevisionKey, "") + txn := s.e.Client().Txn(ctx).If(cmp).Then(then) + tresp, err := txn.Commit() + if err != nil { + return etcd.FromEtcdError(err) + } + if !tresp.Succeeded { + return errors.Errorf("failed to update change group min revision key due to concurrent update") + } + + revision = tresp.Header.Revision + + // then remove all the groups keys with modrevision < minrevision + // remove old groups + + resp, err = s.e.List(ctx, common.EtcdChangeGroupsDir, "", 0) + if err != nil { + return err + } + for _, kv := range resp.Kvs { + if kv.ModRevision < revision-common.EtcdChangeGroupMinRevisionRange { + cmp := etcdclientv3.Compare(etcdclientv3.ModRevision(string(kv.Key)), "=", kv.ModRevision) + then := etcdclientv3.OpDelete(string(kv.Key)) + txn := s.e.Client().Txn(ctx).If(cmp).Then(then) + tresp, err := txn.Commit() + if err != nil { + return etcd.FromEtcdError(err) + } + if !tresp.Succeeded { + log.Errorf("failed to update change group min revision key due to concurrent update") + } + } + } + return nil +} + +func (s *Scheduler) advanceRun(ctx context.Context, runID string) error { + r, _, err := store.GetRun(ctx, s.e, runID) + if err != nil { + return errors.Wrapf(err, "cannot get run %q from etcd", runID) + } + log.Debugf("run: %s", util.Dump(r)) + + switch { + case !r.Result.IsSet() && r.Phase == types.RunPhaseRunning: + finished := true + for _, rt := range r.RunTasks { + if !rt.Status.IsFinished() { + finished = false + } + } + if finished { + r.Result = types.RunResultSuccess + + if _, err := store.AtomicPutRun(ctx, s.e, r, common.RunEventTypeSuccess, nil); err != nil { + return err + } + return nil + } + + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + if err := s.advanceRunTasks(ctx, r); err != nil { + return err + } + + // if the run has a result defined then we can stop current tasks + case r.Result.IsSet(): + if !r.Phase.IsFinished() { + hasRunningTasks, err := s.runHasActiveTasks(ctx, r.ID) + if err != nil { + return err + } + // if the run has a result defined AND there're no executor tasks scheduled we can mark + // the run phase as finished + if !hasRunningTasks { + r.ChangePhase(types.RunPhaseFinished) + } + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + } + + // if the run is finished AND there're no executor tasks scheduled we can mark + // all not started runtasks' fetch phases (logs and archives) as finished + if r.Phase.IsFinished() { + for _, rt := range r.RunTasks { + log.Debugf("rt: %s", util.Dump(rt)) + if rt.Status == types.RunTaskStatusNotStarted { + for _, s := range rt.Steps { + s.LogPhase = types.RunTaskFetchPhaseFinished + } + for i := range rt.WorkspaceArchivesPhase { + rt.WorkspaceArchivesPhase[i] = types.RunTaskFetchPhaseFinished + } + } + } + if _, err := store.AtomicPutRun(ctx, s.e, r, common.RunEventTypeRunning, nil); err != nil { + return err + } + } + } + + return nil +} + +func (s *Scheduler) updateRunStatus(ctx context.Context, et *types.ExecutorTask) error { + log.Debugf("et: %s", util.Dump(et)) + r, _, err := store.GetRun(ctx, s.e, et.RunID) + if err != nil { + return err + } + log.Debugf("run: %s", util.Dump(r)) + + rc, err := store.LTSGetRunConfig(s.wal, r.ID) + if err != nil { + return errors.Wrapf(err, "cannot get run config %q", r.ID) + } + log.Debugf("rc: %s", util.Dump(rc)) + + rt, ok := r.RunTasks[et.ID] + if !ok { + return errors.Errorf("no such run task with id %s for run %s", et.ID, r.ID) + } + rct, ok := rc.Tasks[rt.ID] + log.Debugf("rct: %s", util.Dump(rct)) + if !ok { + return errors.Errorf("no such run config task with id %s for run config %s", rt.ID, rc.ID) + } + + rt.StartTime = et.Status.StartTime + rt.EndTime = et.Status.EndTime + + wrongstatus := false + switch et.Status.Phase { + case types.ExecutorTaskPhaseNotStarted: + if rt.Status != types.RunTaskStatusNotStarted { + wrongstatus = true + } + case types.ExecutorTaskPhaseCancelled: + if rt.Status != types.RunTaskStatusCancelled && + rt.Status != types.RunTaskStatusNotStarted { + wrongstatus = true + } + case types.ExecutorTaskPhaseRunning: + if rt.Status != types.RunTaskStatusRunning && + rt.Status != types.RunTaskStatusNotStarted { + wrongstatus = true + } + case types.ExecutorTaskPhaseStopped: + if rt.Status != types.RunTaskStatusStopped && + rt.Status != types.RunTaskStatusRunning { + wrongstatus = true + } + case types.ExecutorTaskPhaseSuccess: + if rt.Status != types.RunTaskStatusSuccess && + rt.Status != types.RunTaskStatusRunning { + wrongstatus = true + } + case types.ExecutorTaskPhaseFailed: + if rt.Status != types.RunTaskStatusFailed && + rt.Status != types.RunTaskStatusNotStarted && + rt.Status != types.RunTaskStatusRunning { + wrongstatus = true + } + } + if wrongstatus { + log.Warnf("wrong executor task status: %s, rt status: %s", et.Status.Phase, rt.Status) + return nil + } + + switch et.Status.Phase { + case types.ExecutorTaskPhaseNotStarted: + rt.Status = types.RunTaskStatusNotStarted + case types.ExecutorTaskPhaseCancelled: + rt.Status = types.RunTaskStatusCancelled + case types.ExecutorTaskPhaseRunning: + rt.Status = types.RunTaskStatusRunning + case types.ExecutorTaskPhaseStopped: + rt.Status = types.RunTaskStatusStopped + case types.ExecutorTaskPhaseSuccess: + rt.Status = types.RunTaskStatusSuccess + case types.ExecutorTaskPhaseFailed: + rt.Status = types.RunTaskStatusFailed + } + + for i, s := range et.Status.Steps { + rt.Steps[i].Phase = s.Phase + rt.Steps[i].StartTime = s.StartTime + rt.Steps[i].EndTime = s.EndTime + } + + if rt.Status == types.RunTaskStatusFailed { + if !rct.IgnoreFailure { + s.failRun(r) + } + } + + var runEventType common.RunEventType + if r.Phase.IsFinished() { + switch r.Result { + case types.RunResultFailed: + runEventType = common.RunEventTypeFailed + } + } + + if _, err := store.AtomicPutRun(ctx, s.e, r, runEventType, nil); err != nil { + return err + } + + return s.advanceRun(ctx, r.ID) +} + +func (s *Scheduler) failRun(r *types.Run) { + r.Result = types.RunResultFailed +} + +func (s *Scheduler) runScheduler(ctx context.Context, c <-chan *types.ExecutorTask) { + for { + select { + case <-ctx.Done(): + return + case et := <-c: + go func() { + if err := s.updateRunStatus(ctx, et); err != nil { + // TODO(sgotti) improve logging to not return "run modified errors" since + // they are normal + log.Warnf("err: %+v", err) + } + }() + } + } +} + +func (s *Scheduler) executorTasksCleanerLoop(ctx context.Context) { + for { + log.Debugf("executorTasksCleaner") + + if err := s.executorTasksCleaner(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(1 * time.Second) + } +} + +func (s *Scheduler) executorTasksCleaner(ctx context.Context) error { + resp, err := s.e.List(ctx, common.EtcdTasksDir, "", 0) + if err != nil { + return err + } + + for _, kv := range resp.Kvs { + var et *types.ExecutorTask + if err := json.Unmarshal(kv.Value, &et); err != nil { + log.Errorf("err: %+v", err) + continue + } + et.Revision = kv.ModRevision + if err := s.executorTaskCleaner(ctx, et); err != nil { + log.Errorf("err: %+v", err) + } + } + + return nil +} + +func (s *Scheduler) executorTaskCleaner(ctx context.Context, et *types.ExecutorTask) error { + log.Debugf("et: %s", util.Dump(et)) + if et.Status.Phase.IsFinished() { + r, _, err := store.GetRun(ctx, s.e, et.RunID) + if err != nil { + if err == etcd.ErrKeyNotFound { + // run doesn't exists, remove executor task + if err := store.DeleteExecutorTask(ctx, s.e, et.ID); err != nil { + log.Errorf("err: %+v", err) + return err + } + return nil + } + log.Errorf("err: %+v", err) + return err + } + + if r.Phase.IsFinished() { + // if the run is finished mark the executor tasks to stop + if !et.Stop { + et.Stop = true + if _, err := store.AtomicPutExecutorTask(ctx, s.e, et); err != nil { + return err + } + // try to send executor task to executor, if this fails the executor will + // periodically fetch the executortask anyway + if err := s.sendExecutorTask(ctx, et); err != nil { + log.Errorf("err: %+v", err) + return err + } + } + } + } + + if !et.Status.Phase.IsFinished() { + // if the executor doesn't exists anymore mark the not finished executor tasks as failed + executor, err := store.GetExecutor(ctx, s.e, et.Status.ExecutorID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if executor == nil { + log.Warnf("executor with id %q doesn't exist. marking executor task %q as failed", et.Status.ExecutorID, et.ID) + et.Status.Phase = types.ExecutorTaskPhaseFailed + if _, err := store.AtomicPutExecutorTask(ctx, s.e, et); err != nil { + return err + } + } + } + return nil +} + +func (s *Scheduler) runTasksUpdaterLoop(ctx context.Context) { + for { + log.Debugf("runTasksUpdater") + + if err := s.runTasksUpdater(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + time.Sleep(10 * time.Second) + } +} + +func (s *Scheduler) runTasksUpdater(ctx context.Context) error { + log.Debugf("runTasksUpdater") + + session, err := concurrency.NewSession(s.e.Client(), concurrency.WithTTL(5), concurrency.WithContext(ctx)) + if err != nil { + return err + } + defer session.Close() + + m := concurrency.NewMutex(session, "taskupdaterlock") + + if err := m.Lock(ctx); err != nil { + return err + } + defer m.Unlock(ctx) + + resp, err := s.e.List(ctx, common.EtcdTasksDir, "", 0) + if err != nil { + return err + } + + for _, kv := range resp.Kvs { + var et *types.ExecutorTask + if err := json.Unmarshal(kv.Value, &et); err != nil { + return err + } + et.Revision = kv.ModRevision + if err := s.updateRunStatus(ctx, et); err != nil { + log.Errorf("err: %v", err) + } + } + + return nil +} + +func (s *Scheduler) fileExists(path string) (bool, error) { + _, err := os.Stat(path) + if err != nil && !os.IsNotExist(err) { + return false, err + } + return !os.IsNotExist(err), nil +} + +func (s *Scheduler) fetchLog(ctx context.Context, rt *types.RunTask, stepnum int) error { + et, err := store.GetExecutorTask(ctx, s.e, rt.ID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if et == nil { + log.Errorf("executor task with id %q doesn't exist. This shouldn't happen. Skipping fetching", rt.ID) + return nil + } + executor, err := store.GetExecutor(ctx, s.e, et.Status.ExecutorID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if executor == nil { + log.Warnf("executor with id %q doesn't exist. Skipping fetching", et.Status.ExecutorID) + return nil + } + + path := store.LTSRunLogPath(rt.ID, stepnum) + ok, err := s.fileExists(path) + if err != nil { + return err + } + if ok { + return nil + } + + u := fmt.Sprintf(executor.ListenURL+"/api/v1alpha/executor/logs?taskid=%s&step=%d", rt.ID, stepnum) + log.Debugf("fetchLog: %s", u) + r, err := http.Get(u) + if err != nil { + return err + } + defer r.Body.Close() + + // ignore if not found + if r.StatusCode == http.StatusNotFound { + return nil + } + if r.StatusCode != http.StatusOK { + return errors.Errorf("received http status: %d", r.StatusCode) + } + + return s.lts.WriteObject(path, r.Body) +} + +func (s *Scheduler) finishLogPhase(ctx context.Context, runID, runTaskID string, stepnum int) error { + r, _, err := store.GetRun(ctx, s.e, runID) + if err != nil { + return err + } + rt, ok := r.RunTasks[runTaskID] + if !ok { + return errors.Errorf("no such task with ID %s in run %s", runTaskID, runID) + } + if len(rt.Steps) <= stepnum { + return errors.Errorf("no such step for task %s in run %s", runTaskID, runID) + } + + rt.Steps[stepnum].LogPhase = types.RunTaskFetchPhaseFinished + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + return nil +} + +func (s *Scheduler) finishArchivePhase(ctx context.Context, runID, runTaskID string, stepnum int) error { + r, _, err := store.GetRun(ctx, s.e, runID) + if err != nil { + return err + } + rt, ok := r.RunTasks[runTaskID] + if !ok { + return errors.Errorf("no such task with ID %s in run %s", runTaskID, runID) + } + if len(rt.Steps) <= stepnum { + return errors.Errorf("no such step for task %s in run %s", runTaskID, runID) + } + found := false + for i, sn := range rt.WorkspaceArchives { + if stepnum == sn { + found = true + rt.WorkspaceArchivesPhase[i] = types.RunTaskFetchPhaseFinished + break + } + } + if !found { + return errors.Errorf("no workspace archive for task %s, step %d in run %s", runTaskID, stepnum, runID) + } + + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + return nil +} + +func (s *Scheduler) fetchTaskLogs(ctx context.Context, runID string, rt *types.RunTask) { + log.Debugf("fetchTaskLogs") + for i, rts := range rt.Steps { + lp := rts.LogPhase + if lp == types.RunTaskFetchPhaseNotStarted { + if err := s.fetchLog(ctx, rt, i); err != nil { + log.Errorf("err: %+v", err) + continue + } + if err := s.finishLogPhase(ctx, runID, rt.ID, i); err != nil { + log.Errorf("err: %+v", err) + continue + } + } + } +} + +func (s *Scheduler) fetchArchive(ctx context.Context, rt *types.RunTask, stepnum int) error { + et, err := store.GetExecutorTask(ctx, s.e, rt.ID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if et == nil { + log.Errorf("executor task with id %q doesn't exist. This shouldn't happen. Skipping fetching", rt.ID) + return nil + } + executor, err := store.GetExecutor(ctx, s.e, et.Status.ExecutorID) + if err != nil && err != etcd.ErrKeyNotFound { + return err + } + if executor == nil { + log.Warnf("executor with id %q doesn't exist. Skipping fetching", et.Status.ExecutorID) + return nil + } + + path := store.LTSRunArchivePath(rt.ID, stepnum) + ok, err := s.fileExists(path) + if err != nil { + return err + } + if ok { + return nil + } + + u := fmt.Sprintf(executor.ListenURL+"/api/v1alpha/executor/archives?taskid=%s&step=%d", rt.ID, stepnum) + log.Debugf("fetchArchive: %s", u) + r, err := http.Get(u) + if err != nil { + return err + } + defer r.Body.Close() + + // ignore if not found + if r.StatusCode == http.StatusNotFound { + return nil + } + if r.StatusCode != http.StatusOK { + return errors.Errorf("received http status: %d", r.StatusCode) + } + + return s.lts.WriteObject(path, r.Body) +} + +func (s *Scheduler) fetchTaskArchives(ctx context.Context, runID string, rt *types.RunTask) { + log.Debugf("fetchTaskArchives") + for i, stepnum := range rt.WorkspaceArchives { + phase := rt.WorkspaceArchivesPhase[i] + if phase == types.RunTaskFetchPhaseNotStarted { + if err := s.fetchArchive(ctx, rt, stepnum); err != nil { + log.Errorf("err: %+v", err) + continue + } + if err := s.finishArchivePhase(ctx, runID, rt.ID, stepnum); err != nil { + log.Errorf("err: %+v", err) + continue + } + } + } +} + +func (s *Scheduler) fetcherLoop(ctx context.Context) { + for { + log.Debugf("fetcher") + + if err := s.fetcher(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (s *Scheduler) fetcher(ctx context.Context) error { + log.Debugf("fetcher") + runs, err := store.GetRuns(ctx, s.e) + if err != nil { + return err + } + for _, r := range runs { + log.Debugf("r: %s", util.Dump(r)) + for _, rt := range r.RunTasks { + log.Debugf("rt: %s", util.Dump(rt)) + if rt.Status.IsFinished() { + s.fetchTaskLogs(ctx, r.ID, rt) + s.fetchTaskArchives(ctx, r.ID, rt) + } + } + + // We don't update the fetch phases and atomic put the run since fetching may + // take a lot of time and the run will be already updated in the meantime + // causing the atomic put will fail + // Another loop will check if the fetched file exists and update the run + } + return nil + +} + +func (s *Scheduler) runUpdaterLoop(ctx context.Context) { + for { + log.Debugf("runUpdater") + + if err := s.runUpdater(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (s *Scheduler) runUpdater(ctx context.Context) error { + log.Debugf("runUpdater") + runs, err := store.GetRuns(ctx, s.e) + if err != nil { + return err + } + for _, r := range runs { + if err := s.advanceRun(ctx, r.ID); err != nil { + log.Errorf("err: %+v", err) + continue + } + } + + return nil +} + +func (s *Scheduler) finishedRunsArchiverLoop(ctx context.Context) { + for { + log.Debugf("finished run archiver") + + if err := s.finishedRunsArchiver(ctx); err != nil { + log.Errorf("err: %+v", err) + } + + select { + case <-ctx.Done(): + return + default: + } + + time.Sleep(2 * time.Second) + } +} + +func (s *Scheduler) finishedRunsArchiver(ctx context.Context) error { + log.Debugf("finished run archiver") + runs, err := store.GetRuns(ctx, s.e) + if err != nil { + return err + } + for _, r := range runs { + if err := s.finishedRunArchiver(ctx, r); err != nil { + log.Errorf("err: %+v", err) + } + } + + // We write archived runs in lts in the ordered they were archived + runs, err = store.GetRuns(ctx, s.e) + if err != nil { + return err + } + for _, r := range runs { + if r.Archived { + if err := s.runLTSArchiver(ctx, r); err != nil { + log.Errorf("err: %+v", err) + } + } + } + + return nil +} + +// finishedRunArchiver archives a run if it's finished and all the fetching +// phases (logs and archives) are marked as finished +func (s *Scheduler) finishedRunArchiver(ctx context.Context, r *types.Run) error { + //log.Debugf("r: %s", util.Dump(r)) + if !r.Phase.IsFinished() { + return nil + } + + done := true + for _, rt := range r.RunTasks { + // check all logs are fetched + for _, rts := range rt.Steps { + lp := rts.LogPhase + if lp != types.RunTaskFetchPhaseFinished { + done = false + break + } + } + + // check all archives are fetched + for _, lp := range rt.WorkspaceArchivesPhase { + if lp != types.RunTaskFetchPhaseFinished { + done = false + break + } + } + } + if !done { + return nil + } + log.Infof("run %q archiving completed", r.ID) + + // if the fetching is finished we can remove the executor tasks. We cannot + // remove it before since it contains the reference to the executor where we + // should fetch the data + + for _, rt := range r.RunTasks { + log.Infof("deleting executor task %s", rt.ID) + if err := store.DeleteExecutorTask(ctx, s.e, rt.ID); err != nil { + return err + } + } + + r.Archived = true + if _, err := store.AtomicPutRun(ctx, s.e, r, "", nil); err != nil { + return err + } + + return nil +} + +func (s *Scheduler) runLTSArchiver(ctx context.Context, r *types.Run) error { + // TODO(sgotti) avoid saving multiple times the run on lts if the + // store.DeletedArchivedRun fails + log.Infof("saving run in lts: %s", r.ID) + ra, err := store.LTSSaveRunAction(r) + if err != nil { + return err + } + if _, err = s.wal.WriteWal(ctx, []*wal.Action{ra}, nil); err != nil { + return err + } + + log.Infof("deleting run from etcd: %s", r.ID) + if err := store.DeleteRun(ctx, s.e, r.ID); err != nil { + return err + } + + return nil +} + +func (s *Scheduler) additionalActions(action *wal.Action) ([]*wal.Action, error) { + configType, _ := common.PathToTypeID(action.Path) + + var actionType wal.ActionType + + switch action.ActionType { + case wal.ActionTypePut: + actionType = wal.ActionTypePut + case wal.ActionTypeDelete: + actionType = wal.ActionTypeDelete + } + + switch configType { + case common.ConfigTypeRun: + var run *types.Run + if err := json.Unmarshal(action.Data, &run); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal run") + } + indexes := store.LTSGenIndexes(s.lts, run) + actions := make([]*wal.Action, len(indexes)) + for i, index := range indexes { + actions[i] = &wal.Action{ + ActionType: actionType, + Path: index, + Data: []byte{}, + } + } + return actions, nil + } + + return []*wal.Action{}, nil +} + +type Scheduler struct { + c *config.RunServiceScheduler + e *etcd.Store + lts *objectstorage.ObjStorage + wal *wal.WalManager + readDB *readdb.ReadDB + ch *command.CommandHandler +} + +func NewScheduler(ctx context.Context, c *config.RunServiceScheduler) (*Scheduler, error) { + if c.Debug { + level.SetLevel(zapcore.DebugLevel) + } + + lts, err := scommon.NewLTS(&c.LTS) + if err != nil { + return nil, err + } + e, err := scommon.NewEtcd(&c.Etcd, logger, "runscheduler") + if err != nil { + return nil, err + } + + // Create changegroup min revision if it doesn't exists + cmp := []etcdclientv3.Cmp{} + then := []etcdclientv3.Op{} + + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.CreateRevision(common.EtcdChangeGroupMinRevisionKey), "=", 0)) + then = append(then, etcdclientv3.OpPut(common.EtcdChangeGroupMinRevisionKey, "")) + txn := e.Client().Txn(ctx).If(cmp...).Then(then...) + if _, err := txn.Commit(); err != nil { + return nil, etcd.FromEtcdError(err) + } + + s := &Scheduler{ + c: c, + e: e, + lts: lts, + } + + walConf := &wal.WalManagerConfig{ + E: e, + Lts: lts, + AdditionalActionsFunc: s.additionalActions, + } + wal, err := wal.NewWalManager(ctx, logger, walConf) + if err != nil { + return nil, err + } + s.wal = wal + + readDB, err := readdb.NewReadDB(ctx, logger, filepath.Join(c.DataDir, "readdb"), e, wal) + if err != nil { + return nil, err + } + s.readDB = readDB + + ch := command.NewCommandHandler(logger, e, lts, wal) + s.ch = ch + + return s, nil +} + +func (s *Scheduler) Run(ctx context.Context) error { + errCh := make(chan error) + + go func() { errCh <- s.wal.Run(ctx) }() + go s.readDB.Run(ctx) + + ch := make(chan *types.ExecutorTask) + + // noop coors handler + corsHandler := func(h http.Handler) http.Handler { + return h + } + + corsAllowedMethodsOptions := ghandlers.AllowedMethods([]string{"GET", "HEAD", "POST", "PUT", "DELETE"}) + corsAllowedHeadersOptions := ghandlers.AllowedHeaders([]string{"Accept", "Accept-Encoding", "Authorization", "Content-Length", "Content-Type", "X-CSRF-Token", "Authorization"}) + corsAllowedOriginsOptions := ghandlers.AllowedOrigins([]string{"*"}) + corsHandler = ghandlers.CORS(corsAllowedMethodsOptions, corsAllowedHeadersOptions, corsAllowedOriginsOptions) + + // executor dedicated api, only calls from executor should happen on these handlers + executorStatusHandler := api.NewExecutorStatusHandler(s.e, ch) + executorTaskStatusHandler := api.NewExecutorTaskStatusHandler(s.e, ch) + executorTaskHandler := api.NewExecutorTaskHandler(s.e) + executorTasksHandler := api.NewExecutorTasksHandler(s.e) + archivesHandler := api.NewArchivesHandler(logger, s.lts) + + // api from clients + executorDeleteHandler := api.NewExecutorDeleteHandler(logger, s.ch) + + logsHandler := api.NewLogsHandler(logger, s.e, s.lts, s.wal) + + runHandler := api.NewRunHandler(logger, s.e, s.wal, s.readDB) + runTaskActionsHandler := api.NewRunTaskActionsHandler(logger, s.ch) + runsHandler := api.NewRunsHandler(logger, s.readDB) + runActionsHandler := api.NewRunActionsHandler(logger, s.ch) + runCreateHandler := api.NewRunCreateHandler(logger, s.ch) + changeGroupsUpdateTokensHandler := api.NewChangeGroupsUpdateTokensHandler(logger, s.readDB) + + router := mux.NewRouter() + apirouter := router.PathPrefix("/api/v1alpha").Subrouter() + apirouter.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusBadRequest) }) + + apirouter.Handle("/executor/{executorid}", executorStatusHandler).Methods("POST") + apirouter.Handle("/executor/{executorid}", executorDeleteHandler).Methods("DELETE") + apirouter.Handle("/executor/{executorid}/tasks", executorTasksHandler).Methods("GET") + apirouter.Handle("/executor/{executorid}/tasks/{taskid}", executorTaskHandler).Methods("GET") + apirouter.Handle("/executor/{executorid}/tasks/{taskid}", executorTaskStatusHandler).Methods("POST") + apirouter.Handle("/executor/archives", archivesHandler).Methods("GET") + + apirouter.Handle("/logs", logsHandler).Methods("GET") + + apirouter.Handle("/runs/{runid}", runHandler).Methods("GET") + apirouter.Handle("/runs/{runid}/actions", runActionsHandler).Methods("POST") + apirouter.Handle("/runs/{runid}/tasks/{taskid}/actions", runTaskActionsHandler).Methods("POST") + apirouter.Handle("/runs", runsHandler).Methods("GET") + apirouter.Handle("/runs", runCreateHandler).Methods("PUT") + + apirouter.Handle("/changegroups", changeGroupsUpdateTokensHandler).Methods("GET") + + mainrouter := mux.NewRouter() + mainrouter.PathPrefix("/").Handler(corsHandler(router)) + + // Return a bad request when it doesn't match any route + mainrouter.NotFoundHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusBadRequest) }) + + go s.executorTasksCleanerLoop(ctx) + go s.runUpdaterLoop(ctx) + go s.runTasksUpdaterLoop(ctx) + go s.fetcherLoop(ctx) + go s.finishedRunsArchiverLoop(ctx) + go s.compactChangeGroupsLoop(ctx) + + go s.runScheduler(ctx, ch) + + var tlsConfig *tls.Config + if s.c.Web.TLS { + var err error + tlsConfig, err = util.NewTLSConfig(s.c.Web.TLSCertFile, s.c.Web.TLSKeyFile, "", false) + if err != nil { + log.Errorf("err: %+v") + return err + } + } + + httpServer := http.Server{ + Addr: s.c.Web.ListenAddress, + Handler: mainrouter, + TLSConfig: tlsConfig, + } + + lerrCh := make(chan error) + go func() { + lerrCh <- httpServer.ListenAndServe() + }() + + select { + case <-ctx.Done(): + log.Infof("runservice scheduler exiting") + httpServer.Close() + return nil + case err := <-lerrCh: + log.Errorf("http server listen error: %v", err) + return err + case err := <-errCh: + log.Errorf("error: %+v", err) + return err + } +} diff --git a/internal/services/runservice/scheduler/store/store.go b/internal/services/runservice/scheduler/store/store.go new file mode 100644 index 0000000..2e91c71 --- /dev/null +++ b/internal/services/runservice/scheduler/store/store.go @@ -0,0 +1,551 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package store + +import ( + "context" + "encoding/json" + "fmt" + "path" + "reflect" + + "github.com/sorintlab/agola/internal/etcd" + "github.com/sorintlab/agola/internal/objectstorage" + "github.com/sorintlab/agola/internal/sequence" + "github.com/sorintlab/agola/internal/services/runservice/scheduler/common" + "github.com/sorintlab/agola/internal/services/runservice/types" + "github.com/sorintlab/agola/internal/util" + "github.com/sorintlab/agola/internal/wal" + + "github.com/pkg/errors" + etcdclientv3 "go.etcd.io/etcd/clientv3" +) + +func LTSSubGroups(group string) []string { + return util.PathHierarchy(group) +} + +func LTSIndexGroupDir(group string) string { + groupPath := util.EncodeSha256Hex(group) + if group == "." || group == "/" { + groupPath = "all" + } + return path.Join(common.StorageRunsIndexesDir, groupPath) +} + +func LTSIndexRunIDOrderDir(group string, sortOrder types.SortOrder) string { + var dir string + switch sortOrder { + case types.SortOrderAsc: + dir = "runid/asc" + case types.SortOrderDesc: + dir = "runid/desc" + } + return path.Join(LTSIndexGroupDir(group), dir) +} + +func LTSIndexRunIDOrderPath(group, runID string, sortOrder types.SortOrder) string { + s, err := sequence.Parse(runID) + if err != nil { + panic(err) + } + + order := runID + if sortOrder == types.SortOrderDesc { + order = s.Reverse().String() + } + return path.Join(LTSIndexRunIDOrderDir(group, sortOrder), order, runID) +} + +func LTSIndexRunIDOrderPaths(group, runID string, sortOrder types.SortOrder) []string { + paths := []string{} + subGroups := LTSSubGroups(group) + for _, subGroup := range subGroups { + paths = append(paths, LTSIndexRunIDOrderPath(subGroup, runID, sortOrder)) + } + return paths +} + +func LTSRunCounterPaths(group, runID string, sortOrder types.SortOrder) []string { + paths := []string{} + subGroups := LTSSubGroups(group) + for _, subGroup := range subGroups { + paths = append(paths, common.StorageCounterFile(subGroup)) + } + return paths +} + +func LTSGetRunCounter(wal *wal.WalManager, group string) (uint64, *wal.ChangeGroupsUpdateToken, error) { + // use the first group dir after the root + ph := util.PathHierarchy(group) + if len(ph) < 2 { + return 0, nil, errors.Errorf("cannot determine group counter name, wrong group path %q", group) + } + runCounterPath := common.StorageCounterFile(ph[1]) + rcf, cgt, err := wal.ReadObject(runCounterPath, []string{"counter-" + ph[1]}) + if err != nil { + return 0, cgt, err + } + defer rcf.Close() + d := json.NewDecoder(rcf) + var c uint64 + if err := d.Decode(&c); err != nil { + return 0, nil, err + } + + return c, cgt, nil +} + +func LTSUpdateRunCounterAction(ctx context.Context, c uint64, group string) (*wal.Action, error) { + // use the first group dir after the root + ph := util.PathHierarchy(group) + if len(ph) < 2 { + return nil, errors.Errorf("cannot determine group counter name, wrong group path %q", group) + } + + cj, err := json.Marshal(c) + if err != nil { + return nil, err + } + + action := &wal.Action{ + ActionType: wal.ActionTypePut, + Path: common.StorageCounterFile(ph[1]), + Data: cj, + } + + return action, nil +} + +func LTSRunLogPath(rtID string, step int) string { + return path.Join("logs", fmt.Sprintf("%s/%d.log", rtID, step)) +} + +func LTSRunArchivePath(rtID string, step int) string { + return path.Join("workspacearchives", fmt.Sprintf("%s/%d.tar", rtID, step)) +} + +func LTSGetRunConfig(wal *wal.WalManager, runConfigID string) (*types.RunConfig, error) { + runConfigPath := common.StorageRunConfigFile(runConfigID) + rcf, _, err := wal.ReadObject(runConfigPath, nil) + if err != nil { + return nil, err + } + defer rcf.Close() + d := json.NewDecoder(rcf) + var rc *types.RunConfig + if err := d.Decode(&rc); err != nil { + return nil, err + } + + return rc, nil +} + +func LTSSaveRunConfigAction(rc *types.RunConfig) (*wal.Action, error) { + rcj, err := json.Marshal(rc) + if err != nil { + return nil, err + } + + action := &wal.Action{ + ActionType: wal.ActionTypePut, + Path: common.StorageRunConfigFile(rc.ID), + Data: rcj, + } + + return action, nil +} + +func LTSGetRunData(wal *wal.WalManager, runDataID string) (*types.RunData, error) { + runDataPath := common.StorageRunDataFile(runDataID) + rdf, _, err := wal.ReadObject(runDataPath, nil) + if err != nil { + return nil, err + } + defer rdf.Close() + d := json.NewDecoder(rdf) + var rd *types.RunData + if err := d.Decode(&rd); err != nil { + return nil, err + } + + return rd, nil +} + +func LTSSaveRunDataAction(rd *types.RunData) (*wal.Action, error) { + rdj, err := json.Marshal(rd) + if err != nil { + return nil, err + } + + action := &wal.Action{ + ActionType: wal.ActionTypePut, + Path: common.StorageRunDataFile(rd.ID), + Data: rdj, + } + + return action, nil +} + +func LTSGetRun(wal *wal.WalManager, runID string) (*types.Run, error) { + runPath := common.StorageRunFile(runID) + rf, _, err := wal.ReadObject(runPath, nil) + + if err != nil { + return nil, err + } + defer rf.Close() + d := json.NewDecoder(rf) + var r *types.Run + if err := d.Decode(&r); err != nil { + return nil, err + } + + return r, nil +} + +func LTSSaveRunAction(r *types.Run) (*wal.Action, error) { + rj, err := json.Marshal(r) + if err != nil { + return nil, err + } + + action := &wal.Action{ + ActionType: wal.ActionTypePut, + Path: common.StorageRunFile(r.ID), + Data: rj, + } + + return action, nil +} + +func LTSGenIndexes(lts *objectstorage.ObjStorage, r *types.Run) []string { + indexes := []string{} + for _, order := range []types.SortOrder{types.SortOrderAsc, types.SortOrderDesc} { + indexes = append(indexes, LTSIndexRunIDOrderPaths(r.Group, r.ID, order)...) + //indexes = append(indexes, LTSIndexRunArchiveOrderPaths(r.Group, r.LTSSequence, r.ID, order)...) + } + return indexes +} + +func GetExecutor(ctx context.Context, e *etcd.Store, executorID string) (*types.Executor, error) { + resp, err := e.Get(ctx, common.EtcdExecutorKey(executorID)) + if err != nil { + return nil, err + } + + var executor *types.Executor + kv := resp.Kvs[0] + if err := json.Unmarshal(kv.Value, &executor); err != nil { + return nil, err + } + executor.Revision = kv.ModRevision + + return executor, nil +} + +func GetExecutors(ctx context.Context, e *etcd.Store) ([]*types.Executor, error) { + resp, err := e.List(ctx, common.EtcdExecutorsDir, "", 0) + if err != nil { + return nil, err + } + + executors := []*types.Executor{} + + for _, kv := range resp.Kvs { + var executor *types.Executor + if err := json.Unmarshal(kv.Value, &executor); err != nil { + return nil, err + } + executor.Revision = kv.ModRevision + executors = append(executors, executor) + } + + return executors, nil +} + +func PutExecutor(ctx context.Context, e *etcd.Store, executor *types.Executor) (*types.Executor, error) { + executorj, err := json.Marshal(executor) + if err != nil { + return nil, err + } + + resp, err := e.Put(ctx, common.EtcdExecutorKey(executor.ID), executorj, nil) + if err != nil { + return nil, err + } + executor.Revision = resp.Header.Revision + + return executor, nil +} + +func DeleteExecutor(ctx context.Context, e *etcd.Store, executorID string) error { + return e.Delete(ctx, common.EtcdExecutorKey(executorID)) +} + +func GetExecutorTask(ctx context.Context, e *etcd.Store, etID string) (*types.ExecutorTask, error) { + resp, err := e.Get(ctx, common.EtcdTaskKey(etID)) + if err != nil { + return nil, err + } + + var et *types.ExecutorTask + kv := resp.Kvs[0] + if err := json.Unmarshal(kv.Value, &et); err != nil { + return nil, err + } + et.Revision = kv.ModRevision + + return et, nil +} + +func AtomicPutExecutorTask(ctx context.Context, e *etcd.Store, et *types.ExecutorTask) (*types.ExecutorTask, error) { + etj, err := json.Marshal(et) + if err != nil { + return nil, err + } + + resp, err := e.AtomicPut(ctx, common.EtcdTaskKey(et.ID), etj, et.Revision, nil) + if err != nil { + return nil, err + } + et.Revision = resp.Header.Revision + + return et, nil +} + +func UpdateExecutorTaskStatus(ctx context.Context, e *etcd.Store, et *types.ExecutorTask) (*types.ExecutorTask, error) { + curEt, err := GetExecutorTask(ctx, e, et.ID) + if err != nil { + return nil, err + } + + //if curET.Revision >= et.Revision { + // return nil, errors.Errorf("concurrency exception") + //} + + curEt.Status = et.Status + return AtomicPutExecutorTask(ctx, e, curEt) +} + +func DeleteExecutorTask(ctx context.Context, e *etcd.Store, etID string) error { + return e.Delete(ctx, common.EtcdTaskKey(etID)) +} + +func GetExecutorTasks(ctx context.Context, e *etcd.Store, executorID string) ([]*types.ExecutorTask, error) { + resp, err := e.List(ctx, common.EtcdTasksDir, "", 0) + if err != nil { + return nil, err + } + + ets := []*types.ExecutorTask{} + + for _, kv := range resp.Kvs { + var et *types.ExecutorTask + if err := json.Unmarshal(kv.Value, &et); err != nil { + return nil, err + } + et.Revision = kv.ModRevision + if et.Status.ExecutorID == executorID { + ets = append(ets, et) + } + } + + return ets, nil +} + +func GetExecutorTasksForRun(ctx context.Context, e *etcd.Store, runID string) ([]*types.ExecutorTask, error) { + r, curRevision, err := GetRun(ctx, e, runID) + if err != nil { + return nil, err + } + + rtIDs := make([]string, len(r.RunTasks)) + for rtID, _ := range r.RunTasks { + rtIDs = append(rtIDs, rtID) + + } + + ets := []*types.ExecutorTask{} + + // batch fetch in group of 10 tasks at the same revision + i := 0 + for i < len(rtIDs) { + then := []etcdclientv3.Op{} + c := 0 + for c < 10 && i < len(rtIDs) { + then = append(then, etcdclientv3.OpGet(common.EtcdTaskKey(rtIDs[i]), etcdclientv3.WithRev(curRevision))) + c++ + i++ + } + + txn := e.Client().Txn(ctx).Then(then...) + tresp, err := txn.Commit() + if err != nil { + return nil, etcd.FromEtcdError(err) + } + for _, resp := range tresp.Responses { + if len(resp.GetResponseRange().Kvs) == 0 { + continue + } + kv := resp.GetResponseRange().Kvs[0] + var et *types.ExecutorTask + if err := json.Unmarshal(kv.Value, &et); err != nil { + return nil, err + } + et.Revision = kv.ModRevision + ets = append(ets, et) + } + } + + return ets, nil +} + +func GetRun(ctx context.Context, e *etcd.Store, runID string) (*types.Run, int64, error) { + resp, err := e.Get(ctx, common.EtcdRunKey(runID)) + if err != nil { + return nil, 0, err + } + + var r *types.Run + kv := resp.Kvs[0] + if err := json.Unmarshal(kv.Value, &r); err != nil { + return nil, 0, err + } + r.Revision = kv.ModRevision + + return r, resp.Header.Revision, nil +} + +func AtomicPutRun(ctx context.Context, e *etcd.Store, r *types.Run, runEventType common.RunEventType, cgt *types.ChangeGroupsUpdateToken) (*types.Run, error) { + // insert only if the run as changed + curRun, _, err := GetRun(ctx, e, r.ID) + if err != nil && err != etcd.ErrKeyNotFound { + return nil, err + } + if err != etcd.ErrKeyNotFound { + if curRun.Revision != r.Revision { + // fast fail path if the run was already updated + return nil, errors.Errorf("run modified") + } + if reflect.DeepEqual(curRun, r) { + return curRun, nil + } + } + + rj, err := json.Marshal(r) + if err != nil { + return nil, err + } + + hasOptimisticLocking := false + + cmp := []etcdclientv3.Cmp{} + then := []etcdclientv3.Op{} + + key := common.EtcdRunKey(r.ID) + if r.Revision > 0 { + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.ModRevision(key), "=", r.Revision)) + } else { + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.CreateRevision(key), "=", 0)) + } + then = append(then, etcdclientv3.OpPut(key, string(rj))) + + if cgt != nil { + for cgName, cgRev := range cgt.ChangeGroupsRevisions { + hasOptimisticLocking = true + + groupKey := path.Join(common.EtcdChangeGroupsDir, cgName) + if cgRev > 0 { + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.ModRevision(groupKey), "=", cgRev)) + } else { + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.CreateRevision(groupKey), "=", 0)) + } + then = append(then, etcdclientv3.OpPut(groupKey, "")) + } + + if cgt.CurRevision > 0 { + hasOptimisticLocking = true + cmp = append(cmp, etcdclientv3.Compare(etcdclientv3.ModRevision(common.EtcdChangeGroupMinRevisionKey), "<", cgt.CurRevision+common.EtcdChangeGroupMinRevisionRange)) + } + } + + if runEventType != "" { + runEvent, err := common.NewRunEvent(ctx, e, runEventType, r.ID) + if err != nil { + return nil, err + } + eventj, err := json.Marshal(runEvent) + if err != nil { + return nil, err + } + then = append(then, etcdclientv3.OpPut(common.EtcdRunEventKey, string(eventj))) + } + + txn := e.Client().Txn(ctx).If(cmp...).Then(then...) + tresp, err := txn.Commit() + if err != nil { + return nil, etcd.FromEtcdError(err) + } + if !tresp.Succeeded { + if hasOptimisticLocking { + return nil, errors.Errorf("optimistic locking failed") + } + return nil, errors.Errorf("run modified") + } + + r.Revision = tresp.Responses[0].GetResponsePut().Header.Revision + + return r, nil +} + +func DeleteRun(ctx context.Context, e *etcd.Store, runID string) error { + return e.Delete(ctx, common.EtcdRunKey(runID)) +} + +func GetRuns(ctx context.Context, e *etcd.Store) ([]*types.Run, error) { + resp, err := e.List(ctx, common.EtcdRunsDir, "", 0) + if err != nil { + return nil, err + } + + runs := []*types.Run{} + + for _, kv := range resp.Kvs { + var r *types.Run + if err := json.Unmarshal(kv.Value, &r); err != nil { + return nil, err + } + r.Revision = kv.ModRevision + runs = append(runs, r) + } + + return runs, nil +} + +func GetRunEtcdOrLTS(ctx context.Context, e *etcd.Store, wal *wal.WalManager, runID string) (*types.Run, error) { + r, _, err := GetRun(ctx, e, runID) + if err != nil && err != etcd.ErrKeyNotFound { + return nil, err + } + if r == nil { + r, err = LTSGetRun(wal, runID) + if err != nil && err != objectstorage.ErrNotExist { + return nil, err + } + } + + return r, nil +} diff --git a/internal/services/runservice/types/types.go b/internal/services/runservice/types/types.go new file mode 100644 index 0000000..b348027 --- /dev/null +++ b/internal/services/runservice/types/types.go @@ -0,0 +1,515 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package types + +import ( + "encoding/base64" + "encoding/json" + "time" + + "github.com/sorintlab/agola/internal/util" +) + +type SortOrder int + +const ( + SortOrderAsc SortOrder = iota + SortOrderDesc +) + +type RunPhase string + +const ( + RunPhaseQueued RunPhase = "queued" + RunPhaseCancelled RunPhase = "cancelled" + RunPhaseRunning RunPhase = "running" + RunPhaseFinished RunPhase = "finished" + //RunPhaseSuccess RunPhase = "success" + //RunPhaseFailed RunPhase = "failed" +) + +type RunResult string + +const ( + RunResultUnknown RunResult = "unknown" + RunResultStopped RunResult = "stopped" + RunResultSuccess RunResult = "success" + RunResultFailed RunResult = "failed" +) + +func (s RunPhase) IsFinished() bool { + return s == RunPhaseCancelled || s == RunPhaseFinished +} + +func (s RunResult) IsSet() bool { + return s != RunResultUnknown +} + +func RunPhaseFromStringSlice(slice []string) []RunPhase { + rss := make([]RunPhase, len(slice)) + for i, s := range slice { + rss[i] = RunPhase(s) + } + return rss +} + +// Run is the run status of a RUN. Until the run is not finished it'll live in +// etcd. So we should keep it smaller to avoid using too much space +type Run struct { + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + + Counter uint64 `json:"counter,omitempty"` + + // Group is the run group of the run. Every run is assigned to a specific group + // i.e. project/$projectid/$branch + // i.e. user/$projectid/$branch (for a user run) + // this is the format that will be used to archive the runs in the lts. It's + // also needed to fetch them when they aren't indexed in the readdb. + Group string `json:"group,omitempty"` + + // Annotations contain custom run properties + Annotations map[string]string `json:"annotations,omitempty"` + + // Phase represent the current run status. A run could be running but already + // marked as failed due to some tasks failed. The run will be marked as finished + // only then all the executor tasks are known to be really ended. This permits + // "at most once" running runs per branch/project (useful for example to avoid + // multiple concurrent "deploy" tasks that may cause issues) + Phase RunPhase `json:"phase,omitempty"` + + // Result of a Run. + Result RunResult `json:"result,omitempty"` + + RunTasks map[string]*RunTask `json:"run_tasks,omitempty"` + EnqueueTime *time.Time `json:"enqueue_time,omitempty"` + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` + + Archived bool `json:"archived,omitempty"` + + // internal values not saved + Revision int64 `json:"-"` +} + +func (r *Run) ChangePhase(phase RunPhase) { + r.Phase = phase + switch { + case phase == RunPhaseRunning: + r.StartTime = util.TimePtr(time.Now()) + case phase.IsFinished(): + r.EndTime = util.TimePtr(time.Now()) + } +} + +func (r *Run) TasksWaitingApproval() []string { + runTasksIDs := []string{} + for _, rt := range r.RunTasks { + if rt.WaitingApproval { + runTasksIDs = append(runTasksIDs, rt.ID) + } + } + return runTasksIDs +} + +type RunTaskStatus string + +const ( + RunTaskStatusNotStarted RunTaskStatus = "notstarted" + RunTaskStatusCancelled RunTaskStatus = "cancelled" + RunTaskStatusRunning RunTaskStatus = "running" + RunTaskStatusStopped RunTaskStatus = "stopped" + RunTaskStatusSuccess RunTaskStatus = "success" + RunTaskStatusFailed RunTaskStatus = "failed" +) + +func (s RunTaskStatus) IsFinished() bool { + return s == RunTaskStatusCancelled || s == RunTaskStatusStopped || s == RunTaskStatusSuccess || s == RunTaskStatusFailed +} + +type RunTaskFetchPhase string + +const ( + RunTaskFetchPhaseNotStarted RunTaskFetchPhase = "notstarted" + RunTaskFetchPhaseFinished RunTaskFetchPhase = "finished" +) + +type RunTask struct { + ID string `json:"id,omitempty"` + + // Status is the current known RunTask status reported by the executor. So + // sometime it won't be the real status since there may be some already running + // executor tasks not yet reported back. + // So don't rely to know if a runtask is really not running but also check that + // there're no executor tasks scheduled + Status RunTaskStatus `json:"status,omitempty"` + + WaitingApproval bool `json:"waiting_approval,omitempty"` + Approved bool `json:"approved,omitempty"` + // ApprovalAnnotations stores data that the user can set on the approval. Useful + // to save approval information like the user who approved the task. + // This data is opaque to the run service + ApprovalAnnotations map[string]string `json:"approval_annotations,omitempty"` + + Steps []*RunTaskStep `json:"steps,omitempty"` + + // steps numbers of workspace archives, + WorkspaceArchives []int `json:"workspace_archives,omitempty"` + WorkspaceArchivesPhase []RunTaskFetchPhase `json:"workspace_archives_phase,omitempty"` + + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` +} + +func (rt *RunTask) ArchivesFetchFinished() bool { + for _, p := range rt.WorkspaceArchivesPhase { + if p == RunTaskFetchPhaseNotStarted { + return false + } + } + return true +} + +type RunTaskStep struct { + Phase ExecutorTaskPhase `json:"phase,omitempty"` + + // one logphase for every task step + LogPhase RunTaskFetchPhase `json:"log_phase,omitempty"` + + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` +} + +// RunData + +// RunData is the data for a RUN. It contains everything that isn't a state +// (it's contained in a Run) and that may use a lot of space. It lives in the +// storage. There is a RunData for every Run. +type RunData struct { + ID string `json:"id,omitempty"` + + // Group is the run group of the run. Every run is assigned to a specific group + // i.e. project/$projectid/$branch + // i.e. user/$projectid/$branch (for a user run) + // this is the format that will be used to archive the runs in the lts. It's + // also needed to fetch them when they aren't indexed in the readdb. + Group string `json:"group,omitempty"` + + // Environment contains all environment variables that are different between runs also if using the same RunConfig + // (like secrets that may change or user provided enviroment specific to this run) + Environment map[string]string `json:"environment,omitempty"` + + // Annotations contain custom run properties + // Note: Annotations are currently both saved in a Run and in RunData to easily return them without loading RunData from the lts + Annotations map[string]string `json:"annotations,omitempty"` +} + +// RunConfig + +// RunConfig is the run configuration. It lives in the storage. It can be +// copied (i.e when we create a new run from an previous run). +// It could also be shared but to simplify the run delete logic we will just +// copy it when creating a new run as a modified previous run. +type RunConfig struct { + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + + // Environment contains all environment variables that won't change when + // generating a new run (like COMMIT_SHA, BRANCH, REPOSITORY_URL etc...) + Environment map[string]string `json:"environment,omitempty"` + + Tasks map[string]*RunConfigTask `json:"tasks,omitempty"` +} + +type RunConfigTask struct { + Level int `json:"level,omitempty"` + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Depends []*RunConfigTaskDepend `json:"depends"` + Runtime *Runtime `json:"runtime,omitempty"` + Environment map[string]string `json:"environment,omitempty"` + WorkingDir string `json:"working_dir,omitempty"` + Shell string `json:"shell,omitempty"` + User string `json:"user,omitempty"` + Steps []interface{} `json:"steps,omitempty"` + IgnoreFailure bool `json:"ignore_failure,omitempty"` + NeedsApproval bool `json:"needs_approval,omitempty"` +} + +type RunConfigTaskDependCondition string + +const ( + RunConfigTaskDependConditionOnSuccess RunConfigTaskDependCondition = "on_success" + RunConfigTaskDependConditionOnFailure RunConfigTaskDependCondition = "on_failure" +) + +type RunConfigTaskDepend struct { + TaskID string `json:"task_id,omitempty"` + Conditions []RunConfigTaskDependCondition `json:"conditions,omitempty"` +} + +type RuntimeType string + +const ( + RuntimeTypePod RuntimeType = "pod" +) + +type Runtime struct { + Type RuntimeType `json:"type,omitempty"` + Containers []*Container `json:"containers,omitempty"` +} + +func (rct *RunConfigTask) UnmarshalJSON(b []byte) error { + type rctask RunConfigTask + + type task struct { + Steps []json.RawMessage `json:"steps,omitempty"` + } + + rctt := (*rctask)(rct) + if err := json.Unmarshal(b, &rctt); err != nil { + return err + } + + var st task + if err := json.Unmarshal(b, &st); err != nil { + return err + } + + steps := make([]interface{}, len(st.Steps)) + for i, s := range st.Steps { + var bs Step + if err := json.Unmarshal(s, &bs); err != nil { + return err + } + switch bs.Type { + case "run": + var rs RunStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + case "save_to_workspace": + var rs SaveToWorkspaceStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + case "restore_workspace": + var rs RestoreWorkspaceStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + } + } + + rct.Steps = steps + + return nil +} + +type Step struct { + Type string `json:"type,omitempty"` + Name string `json:"name,omitempty"` +} + +type RunStep struct { + Step + Command string `json:"command,omitempty"` + Environment map[string]string `json:"environment,omitempty"` + WorkingDir string `json:"working_dir,omitempty"` + Shell string `json:"shell,omitempty"` + User string `json:"user,omitempty"` +} + +type SaveToWorkspaceContent struct { + SourceDir string `json:"source_dir,omitempty"` + DestDir string `json:"dest_dir,omitempty"` + Paths []string `json:"paths,omitempty"` +} + +type SaveToWorkspaceStep struct { + Step + Contents []SaveToWorkspaceContent `json:"contents,omitempty"` +} + +type RestoreWorkspaceStep struct { + Step + DestDir string `json:"dest_dir,omitempty"` +} + +type ExecutorTaskPhase string + +const ( + ExecutorTaskPhaseNotStarted ExecutorTaskPhase = "notstarted" + ExecutorTaskPhaseCancelled ExecutorTaskPhase = "cancelled" + ExecutorTaskPhaseRunning ExecutorTaskPhase = "running" + ExecutorTaskPhaseStopped ExecutorTaskPhase = "stopped" + ExecutorTaskPhaseSuccess ExecutorTaskPhase = "success" + ExecutorTaskPhaseFailed ExecutorTaskPhase = "failed" +) + +func (s ExecutorTaskPhase) IsFinished() bool { + return s == ExecutorTaskPhaseCancelled || s == ExecutorTaskPhaseStopped || s == ExecutorTaskPhaseSuccess || s == ExecutorTaskPhaseFailed +} + +type ExecutorTask struct { + Revision int64 `json:"revision,omitempty"` + ID string `json:"id,omitempty"` + RunID string `json:"run_id,omitempty"` + TaskName string `json:"task_name,omitempty"` + Containers []*Container `json:"containers,omitempty"` + Environment map[string]string `json:"environment,omitempty"` + WorkingDir string `json:"working_dir,omitempty"` + Shell string `json:"shell,omitempty"` + User string `json:"user,omitempty"` + Steps []interface{} `json:"steps,omitempty"` + + Status ExecutorTaskStatus `json:"status,omitempty"` + SetupError string `fail_reason:"setup_error,omitempty"` + FailError string `fail_reason:"fail_error,omitempty"` + + Workspace Workspace `json:"workspace,omitempty"` + + // Stop is used to signal from the scheduler when the task must be stopped + Stop bool `json:"stop,omitempty"` +} + +type ExecutorTaskStatus struct { + ExecutorID string `json:"executor_id,omitempty"` + Phase ExecutorTaskPhase `json:"phase,omitempty"` + + Steps []*ExecutorTaskStepStatus `json:"steps,omitempty"` + + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` +} + +type ExecutorTaskStepStatus struct { + Phase ExecutorTaskPhase `json:"phase,omitempty"` + + StartTime *time.Time `json:"start_time,omitempty"` + EndTime *time.Time `json:"end_time,omitempty"` + + ExitCode int `json:"exit_code,omitempty"` +} + +type Container struct { + Image string `json:"image,omitempty"` + Environment map[string]string `json:"environment,omitempty"` + User string `json:"user,omitempty"` +} + +type Workspace []WorkspaceLevel + +type WorkspaceLevel []WorkspaceArchives + +type WorkspaceArchives []WorkspaceArchive + +type WorkspaceArchive struct { + TaskID string `json:"task_id,omitempty"` + Step int `json:"step,omitempty"` +} + +func (et *ExecutorTask) UnmarshalJSON(b []byte) error { + type etask ExecutorTask + + type task struct { + Steps []json.RawMessage `json:"steps,omitempty"` + } + + ett := (*etask)(et) + if err := json.Unmarshal(b, &ett); err != nil { + return err + } + + var st task + if err := json.Unmarshal(b, &st); err != nil { + return err + } + + steps := make([]interface{}, len(ett.Steps)) + for i, s := range st.Steps { + var bs Step + if err := json.Unmarshal(s, &bs); err != nil { + return err + } + switch bs.Type { + case "run": + var rs RunStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + case "save_to_workspace": + var rs SaveToWorkspaceStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + case "restore_workspace": + var rs RestoreWorkspaceStep + if err := json.Unmarshal(s, &rs); err != nil { + return err + } + steps[i] = &rs + } + } + + et.Steps = steps + + return nil +} + +type ChangeGroupsUpdateToken struct { + CurRevision int64 `json:"cur_revision"` + ChangeGroupsRevisions ChangeGroupsRevisions `json:"change_groups_revisions"` +} + +type ChangeGroupsRevisions map[string]int64 + +func MarshalChangeGroupsUpdateToken(t *ChangeGroupsUpdateToken) (string, error) { + tj, err := json.Marshal(t) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(tj), nil +} + +func UnmarshalChangeGroupsUpdateToken(s string) (*ChangeGroupsUpdateToken, error) { + if s == "" { + return nil, nil + } + + tj, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return nil, err + } + var t *ChangeGroupsUpdateToken + if err := json.Unmarshal(tj, &t); err != nil { + return nil, err + } + return t, nil +} + +type Executor struct { + // ID is the Executor unique id + ID string `json:"id,omitempty"` + ListenURL string `json:"listenURL,omitempty"` + + // internal values not saved + Revision int64 `json:"-"` +} diff --git a/internal/util/path.go b/internal/util/path.go new file mode 100644 index 0000000..c352e96 --- /dev/null +++ b/internal/util/path.go @@ -0,0 +1,33 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import "path" + +// PathHierarchy return a slice of paths from the base path (root included as . or / ). +// I.E. for a path like "path/to/file" it'll return a slice of these elements: +// ".", "path", "path/to", "path/to/file" +func PathHierarchy(p string) []string { + paths := []string{} + for { + paths = append([]string{p}, paths...) + prevp := p + p = path.Dir(p) + if p == prevp { + break + } + } + return paths +} diff --git a/internal/util/sha.go b/internal/util/sha.go new file mode 100644 index 0000000..16788bb --- /dev/null +++ b/internal/util/sha.go @@ -0,0 +1,37 @@ +// Copyright 2019 Sorint.lab +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "crypto/sha1" + "crypto/sha256" + "encoding/hex" +) + +// EncodeSha1Hex generates sha1 from string and returns its hex encoding +func EncodeSha1Hex(str string) string { + h := sha1.New() + // TODO(sgotti) must handle write errors + h.Write([]byte(str)) + return hex.EncodeToString(h.Sum(nil)) +} + +// EncodeSha1Hex generates sha1 from string and returns its hex encoding +func EncodeSha256Hex(str string) string { + h := sha256.New() + // TODO(sgotti) must handle write errors + h.Write([]byte(str)) + return hex.EncodeToString(h.Sum(nil)) +}