* Create an APIError that should only be used for api returned errors. It'll wrap an error and can have different Kinds and optional code and message. * The http handlers will use the first APIError available in the error chain and generate a json response body containing the code and the user message. The wrapped error is internal and is not sent in the response. If no api error is available in the chain a generic internal server error will be returned. * Add a RemoteError type that will be created from remote services calls (runservice, configstore). It's similar to the APIError but a different type to not propagate to the caller response and it'll not contain any wrapped error. * Gateway: when we call a remote service, by default, we'll create a APIError using the RemoteError Kind (omitting the code and the message that usually must not be propagated). This is done for all the remote service calls as a starting point, in future, if this default behavior is not the right one for a specific remote service call, a new api error with a different kind and/or augmented with the calling service error codes and user messages could be created. * datamanager: Use a dedicated ErrNotExist (and converting objectstorage ErrNotExist).
300 lines
8.8 KiB
Go
300 lines
8.8 KiB
Go
// Copyright 2019 Sorint.lab
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datamanager
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"agola.io/agola/internal/etcd"
|
|
"agola.io/agola/internal/objectstorage"
|
|
"agola.io/agola/internal/sequence"
|
|
|
|
"go.uber.org/zap"
|
|
errors "golang.org/x/xerrors"
|
|
)
|
|
|
|
// TODO(sgotti) handle etcd unwanted changes:
|
|
// * Etcd cluster rebuild: we cannot rely on etcd header ClusterID since it could be the same as it's generated using the listen urls. We should add our own clusterid key and use it.
|
|
// * Etcd cluster restored to a previous revision: really bad cause should detect that the revision is smaller than the current one
|
|
|
|
const (
|
|
DefaultSyncInterval = 5 * time.Second
|
|
DefaultCheckpointInterval = 10 * time.Second
|
|
DefaultCheckpointCleanInterval = 5 * time.Minute
|
|
DefaultEtcdWalCleanInterval = 2 * time.Second
|
|
DefaultStorageWalCleanInterval = 5 * time.Minute
|
|
DefaultCompactChangeGroupsInterval = 1 * time.Second
|
|
DefaultEtcdPingerInterval = 1 * time.Second
|
|
DefaultEtcdWalsKeepNum = 100
|
|
DefaultMinCheckpointWalsNum = 100
|
|
)
|
|
|
|
var (
|
|
ErrCompacted = errors.New("required revision has been compacted")
|
|
ErrConcurrency = errors.New("wal concurrency error: change groups already updated")
|
|
)
|
|
|
|
type ErrNotExist struct {
|
|
err error
|
|
}
|
|
|
|
func newErrNotExist(err error) error {
|
|
return &ErrNotExist{err: err}
|
|
}
|
|
|
|
func (e *ErrNotExist) Error() string {
|
|
return e.err.Error()
|
|
}
|
|
|
|
func (e *ErrNotExist) Unwrap() error {
|
|
return e.err
|
|
}
|
|
|
|
func IsNotExist(err error) bool {
|
|
var e *ErrNotExist
|
|
return errors.As(err, &e)
|
|
}
|
|
|
|
func fromOSTError(err error) error {
|
|
if objectstorage.IsNotExist(err) {
|
|
return newErrNotExist(err)
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
var (
|
|
// Storage paths. Always use path (not filepath) to use the "/" separator
|
|
storageDataDir = "data"
|
|
|
|
storageWalsDir = "wals"
|
|
storageWalsStatusDir = path.Join(storageWalsDir, "status")
|
|
storageWalsDataDir = path.Join(storageWalsDir, "data")
|
|
|
|
// etcd paths. Always use path (not filepath) to use the "/" separator
|
|
etcdWalBaseDir = "datamanager"
|
|
etcdWalsDir = path.Join(etcdWalBaseDir, "wals")
|
|
etcdWalsDataKey = path.Join(etcdWalBaseDir, "walsdata")
|
|
etcdWalSeqKey = path.Join(etcdWalBaseDir, "walseq")
|
|
etcdLastCommittedStorageWalSeqKey = path.Join(etcdWalBaseDir, "lastcommittedstoragewalseq")
|
|
etcdCheckpointSeqKey = path.Join(etcdWalBaseDir, "checkpointseq")
|
|
|
|
etcdInitEtcdLockKey = path.Join(etcdWalBaseDir, "initetcd")
|
|
etcdSyncLockKey = path.Join(etcdWalBaseDir, "synclock")
|
|
etcdCompactChangeGroupsLockKey = path.Join(etcdWalBaseDir, "compactchangegroupslock")
|
|
etcdCheckpointLockKey = path.Join(etcdWalBaseDir, "checkpointlock")
|
|
etcdWalCleanerLockKey = path.Join(etcdWalBaseDir, "walcleanerlock")
|
|
etcdStorageWalCleanerLockKey = path.Join(etcdWalBaseDir, "storagewalcleanerlock")
|
|
|
|
etcdChangeGroupsDir = path.Join(etcdWalBaseDir, "changegroups")
|
|
etcdChangeGroupMinRevisionKey = path.Join(etcdWalBaseDir, "changegroupsminrev")
|
|
|
|
etcdPingKey = path.Join(etcdWalBaseDir, "ping")
|
|
)
|
|
|
|
const (
|
|
etcdChangeGroupMinRevisionRange = 1000
|
|
|
|
maxChangegroupNameLength = 256
|
|
)
|
|
|
|
type DataManagerConfig struct {
|
|
BasePath string
|
|
E *etcd.Store
|
|
OST *objectstorage.ObjStorage
|
|
DataTypes []string
|
|
EtcdWalsKeepNum int
|
|
CheckpointInterval time.Duration
|
|
CheckpointCleanInterval time.Duration
|
|
// MinCheckpointWalsNum is the minimum number of wals required before doing a checkpoint
|
|
MinCheckpointWalsNum int
|
|
MaxDataFileSize int64
|
|
MaintenanceMode bool
|
|
}
|
|
|
|
type DataManager struct {
|
|
basePath string
|
|
log *zap.SugaredLogger
|
|
e *etcd.Store
|
|
ost *objectstorage.ObjStorage
|
|
changes *WalChanges
|
|
dataTypes []string
|
|
etcdWalsKeepNum int
|
|
checkpointInterval time.Duration
|
|
checkpointCleanInterval time.Duration
|
|
minCheckpointWalsNum int
|
|
maxDataFileSize int64
|
|
maintenanceMode bool
|
|
}
|
|
|
|
func NewDataManager(ctx context.Context, logger *zap.Logger, conf *DataManagerConfig) (*DataManager, error) {
|
|
if conf.EtcdWalsKeepNum == 0 {
|
|
conf.EtcdWalsKeepNum = DefaultEtcdWalsKeepNum
|
|
}
|
|
if conf.EtcdWalsKeepNum < 1 {
|
|
return nil, errors.New("etcdWalsKeepNum must be greater than 0")
|
|
}
|
|
if conf.CheckpointInterval == 0 {
|
|
conf.CheckpointInterval = DefaultCheckpointInterval
|
|
}
|
|
if conf.CheckpointCleanInterval == 0 {
|
|
conf.CheckpointCleanInterval = DefaultCheckpointCleanInterval
|
|
}
|
|
if conf.MinCheckpointWalsNum == 0 {
|
|
conf.MinCheckpointWalsNum = DefaultMinCheckpointWalsNum
|
|
}
|
|
if conf.MinCheckpointWalsNum < 1 {
|
|
return nil, errors.New("minCheckpointWalsNum must be greater than 0")
|
|
}
|
|
if conf.MaxDataFileSize == 0 {
|
|
conf.MaxDataFileSize = DefaultMaxDataFileSize
|
|
}
|
|
|
|
d := &DataManager{
|
|
basePath: conf.BasePath,
|
|
log: logger.Sugar(),
|
|
e: conf.E,
|
|
ost: conf.OST,
|
|
changes: NewWalChanges(conf.DataTypes),
|
|
dataTypes: conf.DataTypes,
|
|
etcdWalsKeepNum: conf.EtcdWalsKeepNum,
|
|
checkpointInterval: conf.CheckpointInterval,
|
|
checkpointCleanInterval: conf.CheckpointCleanInterval,
|
|
minCheckpointWalsNum: conf.MinCheckpointWalsNum,
|
|
maxDataFileSize: conf.MaxDataFileSize,
|
|
maintenanceMode: conf.MaintenanceMode,
|
|
}
|
|
|
|
// add trailing slash the basepath
|
|
if d.basePath != "" && !strings.HasSuffix(d.basePath, "/") {
|
|
d.basePath = d.basePath + "/"
|
|
}
|
|
|
|
return d, nil
|
|
}
|
|
|
|
func (d *DataManager) storageWalStatusDir() string {
|
|
return path.Join(d.basePath, storageWalsStatusDir)
|
|
}
|
|
|
|
func (d *DataManager) storageWalStatusFile(walSeq string) string {
|
|
return path.Join(d.storageWalStatusDir(), walSeq)
|
|
}
|
|
|
|
func (d *DataManager) storageWalDataDir() string {
|
|
return path.Join(d.basePath, storageWalsDataDir)
|
|
}
|
|
|
|
func (d *DataManager) storageWalDataFile(walFileID string) string {
|
|
return path.Join(d.storageWalDataDir(), walFileID)
|
|
}
|
|
|
|
func (d *DataManager) storageDataDir() string {
|
|
return path.Join(d.basePath, storageDataDir)
|
|
}
|
|
|
|
func (d *DataManager) dataStatusPath(sequence *sequence.Sequence) string {
|
|
return fmt.Sprintf("%s/%s.status", d.storageDataDir(), sequence)
|
|
}
|
|
|
|
func (d *DataManager) DataTypeDir(dataType string) string {
|
|
return fmt.Sprintf("%s/%s", d.storageDataDir(), dataType)
|
|
}
|
|
|
|
func (d *DataManager) DataFileBasePath(dataType, name string) string {
|
|
return fmt.Sprintf("%s/%s", d.DataTypeDir(dataType), name)
|
|
}
|
|
|
|
func (d *DataManager) DataFileIndexPath(dataType, name string) string {
|
|
return fmt.Sprintf("%s.index", d.DataFileBasePath(dataType, name))
|
|
}
|
|
|
|
func (d *DataManager) DataFilePath(dataType, name string) string {
|
|
return fmt.Sprintf("%s.data", d.DataFileBasePath(dataType, name))
|
|
}
|
|
|
|
func etcdWalKey(walSeq string) string {
|
|
return path.Join(etcdWalsDir, walSeq)
|
|
}
|
|
|
|
// SetMaintenanceMode sets the datamanager in maintenance mode. This method must
|
|
// be called before invoking the Run method
|
|
func (d *DataManager) SetMaintenanceMode(maintenanceMode bool) {
|
|
d.maintenanceMode = maintenanceMode
|
|
}
|
|
|
|
// deleteEtcd deletes all etcd data excluding keys used for locking
|
|
func (d *DataManager) deleteEtcd(ctx context.Context) error {
|
|
prefixes := []string{
|
|
etcdWalsDir + "/",
|
|
etcdWalsDataKey,
|
|
etcdWalSeqKey,
|
|
etcdLastCommittedStorageWalSeqKey,
|
|
etcdCheckpointSeqKey,
|
|
etcdChangeGroupsDir + "/",
|
|
etcdChangeGroupMinRevisionKey,
|
|
}
|
|
for _, prefix := range prefixes {
|
|
if err := d.e.DeletePrefix(ctx, prefix); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *DataManager) Run(ctx context.Context, readyCh chan struct{}) error {
|
|
if !d.maintenanceMode {
|
|
for {
|
|
err := d.InitEtcd(ctx, nil)
|
|
if err == nil {
|
|
break
|
|
}
|
|
d.log.Errorf("failed to initialize etcd: %+v", err)
|
|
|
|
sleepCh := time.NewTimer(1 * time.Second).C
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
case <-sleepCh:
|
|
}
|
|
}
|
|
|
|
readyCh <- struct{}{}
|
|
|
|
go d.watcherLoop(ctx)
|
|
go d.syncLoop(ctx)
|
|
go d.checkpointLoop(ctx)
|
|
go d.checkpointCleanLoop(ctx)
|
|
go d.etcdWalCleanerLoop(ctx)
|
|
go d.storageWalCleanerLoop(ctx)
|
|
go d.compactChangeGroupsLoop(ctx)
|
|
go d.etcdPingerLoop(ctx)
|
|
|
|
} else {
|
|
d.log.Infof("datamanager starting in maintenance mode")
|
|
readyCh <- struct{}{}
|
|
}
|
|
|
|
<-ctx.Done()
|
|
d.log.Infof("datamanager exiting")
|
|
|
|
return nil
|
|
}
|