From ce306d2a4b48cb219793eb7f25dcdb62ae8b97d8 Mon Sep 17 00:00:00 2001 From: Azareal Date: Sat, 7 Mar 2020 13:25:50 +1000 Subject: [PATCH] add slurp, exabot, sogou and alexa user agents try again to add aspiegelbot --- gen_router.go | 95 ++++++++++++++++++++++++++-------------------- langs/english.json | 4 ++ router_gen/main.go | 13 +++++++ 3 files changed, 71 insertions(+), 41 deletions(-) diff --git a/gen_router.go b/gen_router.go index 5452e2d8..0aae6bd4 100644 --- a/gen_router.go +++ b/gen_router.go @@ -586,25 +586,29 @@ var agentMapEnum = map[string]int{ "googlebot": 12, "yandex": 13, "bing": 14, - "baidu": 15, - "duckduckgo": 16, - "seznambot": 17, - "discord": 18, - "twitter": 19, - "facebook": 20, - "cloudflare": 21, - "uptimebot": 22, - "slackbot": 23, - "apple": 24, - "discourse": 25, - "lynx": 26, - "blank": 27, - "malformed": 28, - "suspicious": 29, - "semrush": 30, - "dotbot": 31, - "aspiegel": 32, - "zgrab": 33, + "slurp": 15, + "exabot": 16, + "baidu": 17, + "sogou": 18, + "duckduckgo": 19, + "seznambot": 20, + "discord": 21, + "twitter": 22, + "facebook": 23, + "cloudflare": 24, + "uptimebot": 25, + "slackbot": 26, + "apple": 27, + "discourse": 28, + "alexa": 29, + "lynx": 30, + "blank": 31, + "malformed": 32, + "suspicious": 33, + "semrush": 34, + "dotbot": 35, + "aspiegel": 36, + "zgrab": 37, } var reverseAgentMapEnum = map[int]string{ 0: "unknown", @@ -622,25 +626,29 @@ var reverseAgentMapEnum = map[int]string{ 12: "googlebot", 13: "yandex", 14: "bing", - 15: "baidu", - 16: "duckduckgo", - 17: "seznambot", - 18: "discord", - 19: "twitter", - 20: "facebook", - 21: "cloudflare", - 22: "uptimebot", - 23: "slackbot", - 24: "apple", - 25: "discourse", - 26: "lynx", - 27: "blank", - 28: "malformed", - 29: "suspicious", - 30: "semrush", - 31: "dotbot", - 32: "aspiegel", - 33: "zgrab", + 15: "slurp", + 16: "exabot", + 17: "baidu", + 18: "sogou", + 19: "duckduckgo", + 20: "seznambot", + 21: "discord", + 22: "twitter", + 23: "facebook", + 24: "cloudflare", + 25: "uptimebot", + 26: "slackbot", + 27: "apple", + 28: "discourse", + 29: "alexa", + 30: "lynx", + 31: "blank", + 32: "malformed", + 33: "suspicious", + 34: "semrush", + 35: "dotbot", + 36: "aspiegel", + 37: "zgrab", } var markToAgent = map[string]string{ "OPR": "opera", @@ -657,8 +665,11 @@ var markToAgent = map[string]string{ "yandex": "yandex", "DuckDuckBot": "duckduckgo", "Baiduspider": "baidu", + "Sogou": "sogou", "bingbot": "bing", "BingPreview": "bing", + "Slurp": "slurp", + "Exabot": "exabot", "SeznamBot": "seznambot", "CloudFlare": "cloudflare", "Uptimebot": "uptimebot", @@ -669,8 +680,10 @@ var markToAgent = map[string]string{ "Facebot": "facebook", "Applebot": "apple", "Discourse": "discourse", + "ia_archiver": "alexa", "SemrushBot": "semrush", "DotBot": "dotbot", + "AspiegelBot": "aspiegel", "zgrab": "zgrab", } /*var agentRank = map[string]int{ @@ -796,7 +809,7 @@ func (r *GenRouter) SuspiciousRequest(req *http.Request, prepend string) { prepend += "\n" } r.DumpRequest(req,prepend+"Suspicious Request") - co.AgentViewCounter.Bump(29) + co.AgentViewCounter.Bump(33) } func isLocalHost(h string) bool { @@ -811,7 +824,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.WriteHeader(200) // 400 w.Write([]byte("")) r.DumpRequest(req,"Malformed Request T"+strconv.Itoa(typ)) - co.AgentViewCounter.Bump(28) + co.AgentViewCounter.Bump(32) } // Split the Host and Port string @@ -953,7 +966,7 @@ func (r *GenRouter) ServeHTTP(w http.ResponseWriter, req *http.Request) { ua := strings.TrimSpace(strings.Replace(strings.TrimPrefix(req.UserAgent(),"Mozilla/5.0 ")," Safari/537.36","",-1)) // Noise, no one's going to be running this and it would require some sort of agent ranking system to determine which identifier should be prioritised over another if ua == "" { - co.AgentViewCounter.Bump(27) + co.AgentViewCounter.Bump(31) if c.Dev.DebugMode { var prepend string for _, char := range req.UserAgent() { diff --git a/langs/english.json b/langs/english.json index 195f7c40..6abf5d96 100644 --- a/langs/english.json +++ b/langs/english.json @@ -200,6 +200,9 @@ "googlebot":"Googlebot", "yandex":"Yandex", "bing":"Bing", + "slurp":"Yahoo! Slurp", + "exabot":"Exabot", + "sogou":"Sogou", "baidu":"Baidu", "duckduckgo":"DuckDuckBot", "seznambot":"SeznamBot", @@ -211,6 +214,7 @@ "facebook":"FacebookBot", "apple":"AppleBot", "discourse":"Discourse Forum Onebox", + "alexa":"Alexa", "lynx":"Lynx", "semrush":"SemrushBot", diff --git a/router_gen/main.go b/router_gen/main.go index a2df21fb..28327e61 100644 --- a/router_gen/main.go +++ b/router_gen/main.go @@ -245,7 +245,10 @@ func main() { "googlebot", "yandex", "bing", + "slurp", + "exabot", "baidu", + "sogou", "duckduckgo", "seznambot", "discord", @@ -256,6 +259,7 @@ func main() { "slackbot", "apple", "discourse", + "alexa", "lynx", "blank", "malformed", @@ -287,8 +291,11 @@ func main() { "yandex", "DuckDuckBot", "Baiduspider", + "Sogou", "bingbot", "BingPreview", + "Slurp", + "Exabot", "SeznamBot", "CloudFlare", "Uptimebot", @@ -299,9 +306,11 @@ func main() { "Facebot", "Applebot", "Discourse", + "ia_archiver", "SemrushBot", "DotBot", + "AspiegelBot", "zgrab", } @@ -321,8 +330,11 @@ func main() { "yandex": "yandex", // from the URL "DuckDuckBot": "duckduckgo", "Baiduspider": "baidu", + "Sogou": "sogou", "bingbot": "bing", "BingPreview": "bing", + "Slurp": "slurp", + "Exabot": "exabot", "SeznamBot": "seznambot", "CloudFlare": "cloudflare", // Track alwayson specifically in case there are other bots? "Uptimebot": "uptimebot", @@ -333,6 +345,7 @@ func main() { "Facebot": "facebook", "Applebot": "apple", "Discourse": "discourse", + "ia_archiver": "alexa", "SemrushBot": "semrush", "DotBot": "dotbot",