From 87c04109146a0956261f7caa4af28167c406cc6d Mon Sep 17 00:00:00 2001
From: Andrey Meshkov <am@adguard.com>
Date: Fri, 21 Feb 2020 12:57:12 +0300
Subject: [PATCH] -(dnsforward): start using lastProbeLineIdx in the Seek

---
 querylog/qlog_file.go      | 23 ++++++++++++++---------
 querylog/qlog_file_test.go | 31 ++++++++++++++++++++++++++-----
 querylog/qlog_reader.go    |  2 +-
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/querylog/qlog_file.go b/querylog/qlog_file.go
index 24c5a4e2..54d1896c 100644
--- a/querylog/qlog_file.go
+++ b/querylog/qlog_file.go
@@ -59,10 +59,12 @@ func NewQLogFile(path string) (*QLogFile, error) {
 // it shifts seek position to 3/4 of the file. Otherwise, to 1/4 of the file.
 // 5. It performs the search again, every time the search scope is narrowed twice.
 //
-// It returns the position of the the line with the timestamp we were looking for
+// Returns:
+// * It returns the position of the the line with the timestamp we were looking for
 // so that when we call "ReadNext" this line was returned.
-// If we could not find it, it returns 0 and ErrSeekNotFound
-func (q *QLogFile) Seek(timestamp int64) (int64, error) {
+// * Depth of the search (how many times we compared timestamps).
+// * If we could not find it, it returns ErrSeekNotFound
+func (q *QLogFile) Seek(timestamp int64) (int64, int, error) {
 	q.lock.Lock()
 	defer q.lock.Unlock()
 
@@ -72,7 +74,7 @@ func (q *QLogFile) Seek(timestamp int64) (int64, error) {
 	// First of all, check the file size
 	fileInfo, err := q.file.Stat()
 	if err != nil {
-		return 0, err
+		return 0, 0, err
 	}
 
 	// Define the search scope
@@ -91,14 +93,14 @@ func (q *QLogFile) Seek(timestamp int64) (int64, error) {
 		// Get the line at the specified position
 		line, lineIdx, err = q.readProbeLine(probe)
 		if err != nil {
-			return 0, err
+			return 0, depth, err
 		}
 
 		// Get the timestamp from the query log record
 		ts := readQLogTimestamp(line)
 
 		if ts == 0 {
-			return 0, ErrSeekNotFound
+			return 0, depth, ErrSeekNotFound
 		}
 
 		if ts == timestamp {
@@ -109,9 +111,12 @@ func (q *QLogFile) Seek(timestamp int64) (int64, error) {
 		if lastProbeLineIdx == lineIdx {
 			// If we're testing the same line twice then most likely
 			// the scope is too narrow and we won't find anything anymore
-			return 0, ErrSeekNotFound
+			return 0, depth, ErrSeekNotFound
 		}
 
+		// Save the last found idx
+		lastProbeLineIdx = lineIdx
+
 		// Narrow the scope and repeat the search
 		if ts > timestamp {
 			// If the timestamp we're looking for is OLDER than what we found
@@ -128,12 +133,12 @@ func (q *QLogFile) Seek(timestamp int64) (int64, error) {
 		depth++
 		if depth >= 100 {
 			log.Error("Seek depth is too high, aborting. File %s, ts %v", q.file.Name(), timestamp)
-			return 0, ErrSeekNotFound
+			return 0, depth, ErrSeekNotFound
 		}
 	}
 
 	q.position = lineIdx + int64(len(line))
-	return q.position, nil
+	return q.position, depth, nil
 }
 
 // SeekStart changes the current position to the end of the file
diff --git a/querylog/qlog_file_test.go b/querylog/qlog_file_test.go
index c5871475..a0fa07f1 100644
--- a/querylog/qlog_file_test.go
+++ b/querylog/qlog_file_test.go
@@ -4,6 +4,7 @@ import (
 	"encoding/binary"
 	"io"
 	"io/ioutil"
+	"math"
 	"net"
 	"os"
 	"strings"
@@ -95,13 +96,23 @@ func TestQLogFileSeekLargeFile(t *testing.T) {
 	testSeekLineQLogFile(t, q, count)
 
 	// CASE 5: Seek non-existent (too low)
-	_, err = q.Seek(123)
+	_, _, err = q.Seek(123)
 	assert.NotNil(t, err)
 
 	// CASE 6: Seek non-existent (too high)
 	ts, _ := time.Parse(time.RFC3339, "2100-01-02T15:04:05Z07:00")
-	_, err = q.Seek(ts.UnixNano())
+	_, _, err = q.Seek(ts.UnixNano())
 	assert.NotNil(t, err)
+
+	// CASE 7: "Almost" found
+	line, err := getQLogFileLine(q, count/2)
+	assert.Nil(t, err)
+	// ALMOST the record we need
+	timestamp := readQLogTimestamp(line) - 1
+	assert.NotEqual(t, uint64(0), timestamp)
+	_, depth, err := q.Seek(timestamp)
+	assert.NotNil(t, err)
+	assert.True(t, depth <= int(math.Log2(float64(count))+3))
 }
 
 func TestQLogFileSeekSmallFile(t *testing.T) {
@@ -131,13 +142,23 @@ func TestQLogFileSeekSmallFile(t *testing.T) {
 	testSeekLineQLogFile(t, q, count)
 
 	// CASE 5: Seek non-existent (too low)
-	_, err = q.Seek(123)
+	_, _, err = q.Seek(123)
 	assert.NotNil(t, err)
 
 	// CASE 6: Seek non-existent (too high)
 	ts, _ := time.Parse(time.RFC3339, "2100-01-02T15:04:05Z07:00")
-	_, err = q.Seek(ts.UnixNano())
+	_, _, err = q.Seek(ts.UnixNano())
 	assert.NotNil(t, err)
+
+	// CASE 7: "Almost" found
+	line, err := getQLogFileLine(q, count/2)
+	assert.Nil(t, err)
+	// ALMOST the record we need
+	timestamp := readQLogTimestamp(line) - 1
+	assert.NotEqual(t, uint64(0), timestamp)
+	_, depth, err := q.Seek(timestamp)
+	assert.NotNil(t, err)
+	assert.True(t, depth <= int(math.Log2(float64(count))+3))
 }
 
 func testSeekLineQLogFile(t *testing.T, q *QLogFile, lineNumber int) {
@@ -147,7 +168,7 @@ func testSeekLineQLogFile(t *testing.T, q *QLogFile, lineNumber int) {
 	assert.NotEqual(t, uint64(0), ts)
 
 	// try seeking to that line now
-	pos, err := q.Seek(ts)
+	pos, _, err := q.Seek(ts)
 	assert.Nil(t, err)
 	assert.NotEqual(t, int64(0), pos)
 
diff --git a/querylog/qlog_reader.go b/querylog/qlog_reader.go
index 5f4f4dfa..ee2f617d 100644
--- a/querylog/qlog_reader.go
+++ b/querylog/qlog_reader.go
@@ -51,7 +51,7 @@ func NewQLogReader(files []string) (*QLogReader, error) {
 func (r *QLogReader) Seek(timestamp int64) error {
 	for i := len(r.qFiles) - 1; i >= 0; i-- {
 		q := r.qFiles[i]
-		_, err := q.Seek(timestamp)
+		_, _, err := q.Seek(timestamp)
 		if err == nil {
 			// Our search is finished, we found the element we were looking for
 			// Update currentFile only, position is already set properly in the QLogFile