Atraso de requisições para bots usando Nginx

14 min

language: ja bn en es hi pt ru zh-cn zh-tw

Olá, sou o incompetente.

Foi quando, por acaso, verifiquei as requisições do site que eu mesmo hospedo.

Requisições de Bot Absurdas

Basicamente, eu não estava verificando com frequência, pois configurei o fail2ban para bloquear ataques DoS HTTP no nível de IP, fazendo com que os alvos fossem dropados.

Como não consigo ver o próprio UA com rhit, fiquei curioso e o bifurquei para que pudesse ser visto por unidade de UA no branch dev.

https://github.com/haturatu/rhit/tree/dev

Os resultados para os 50 principais são os seguintes:

# rhit -f ua -l 5
I've read 3 files in "/var/log/nginx"
1,848,993 hits and 11G from 2026/03/28 to 2026/04/02
2,025 user agents. 100 most frequent:
┌───┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────┬─────┬─────┬─────┐
│ # │user agent                                                                                                                             │ hits  │bytes│days │trend│
├───┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼───────┼─────┼─────┼─────┤
│  1│meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)                                              │486,092│ 4.5G│▇ ▄▆ │     │
│  2│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0          │143,018│ 663M│▄ ▇▆ │     │
│  3│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0          │142,880│ 665M│▄ ▇▆ │     │
│  4│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36                        │142,562│ 661M│▄ ▇▆ │     │
│  5│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36                        │142,454│ 648M│▄ ▇▆ │     │
│  6│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36                  │142,284│ 636M│▄ ▇▆ │     │
│  7│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36                        │142,279│ 643M│▄ ▇▆ │     │
│  8│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36                        │141,777│ 669M│▄ ▇▆ │     │
│  9│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36                  │141,209│ 652M│▄ ▇▆ │     │
│ 10│Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)         │ 48,242│ 211M│▆ ▅▇ │     │
│   │Chrome/119.0.6045.214 Safari/537.36                                                                                                    │       │     │     │     │
│ 11│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15 (Applebot/0.1;   │ 47,859│ 252M│▂ ▄▇ │  ➚  │
│   │+http://www.apple.com/go/applebot)                                                                                                     │       │     │     │     │
│ 12│Enjoy Relay 0.3.1                                                                                                                      │ 45,217│ 236M│▆ ▇▇ │     │
│ 13│selective-relay/0.1.0 (https://hashtag-relay.dtp-mstdn.jp)                                                                             │ 13,577│  71M│▇    │➘ ➘ ➘│
│ 14│Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)                                                                          │  6,575│  14M│▇ ▁▂ │  ➘  │
│ 15│curl/8.7.1                                                                                                                             │  4,062│ 7.4M│▇  ▇ │  ➚  │
│ 16│pub-relay-prototype                                                                                                                    │  3,389│  18M│▆ ▇▇ │     │
│ 17│Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.3; +https://openai.com/gptbot)                                 │  3,113│ 8.2M│▇ ▇▇ │     │
│ 18│Mozilla/5.0 (Android 16; Mobile; rv:132.0) Gecko/132.0 Firefox/132.0                                                                   │  2,786│  15M│▆ ▃▇ │     │
│ 19│Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)                                                                     │  2,240│  15M│▇  ▃ │     │
│ 20│Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36                        │  1,715│  23M│▁ ▂▇ │ ➚ ➚ │
│ 21│Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible;                             │  1,545│  11M│▇ ▃▅ │     │
│   │PetalBot;+https://webmaster.petalsearch.com/site/petalbot)                                                                             │       │     │     │     │
│ 22│Mozilla/5.0 (compatible; SERankingBacklinksBot/1.0; +https://seranking.com/backlinks-crawler)                                          │  1,426│ 6.7M│▃ ▇  │➘ ➘ ➘│
│ 23│facebookexternalua                                                                                                                     │  1,385│ 7.2M│▇ ▂▅ │     │
│ 24│Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.7680.164 Mobile          │  1,382│ 5.5M│   ▇ │➚ ➚ ➚│
│   │Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)                                                             │       │     │     │     │
│ 25│Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider;                  │  1,356│ 6.6M│▅ ▄▇ │     │
│   │spider-feedback@bytedance.com)                                                                                                         │       │     │     │     │
│ 26│Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)                                │  1,061│ 2.2M│▃ ▇▄ │     │
│ 27│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36                        │    930│ 3.6M│▆ ▅▇ │     │
│ 28│Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; TikTokSpider;                │    925│ 2.1M│▆ ▅▇ │     │
│   │ttspider-feedback@tiktok.com)                                                                                                          │       │     │     │     │
│ 29│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3                     │    880│ 1.0M│  ▇  │➘ ➘ ➘│
│ 30│Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)                                               │    645│ 9.4M│▇    │ ➘ ➘ │
│ 31│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36                        │    637│  14M│▇    │➘ ➘ ➘│
│ 32│Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.7680.153 Mobile          │    616│ 2.7M│▇ ▇  │➘ ➘ ➘│
│   │Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)                                                             │       │     │     │     │
│ 33│Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148         │    527│ 3.2M│▆ ▇▇ │     │
│   │Safari/604.1                                                                                                                           │       │     │     │     │
│ 34│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36                  │    449│ 4.8M│▄ ▇▆ │     │
│ 35│Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/116.0.1938.76     │    441│ 5.8M│▅ ▆▇ │     │
│   │Safari/537.36                                                                                                                          │       │     │     │     │
│ 36│organisera.org - Mobilizon 5.1.0                                                                                                       │    440│ 2.3M│▇ ▆▇ │     │
│ 37│python-requests/2.32.5                                                                                                                 │    390│ 227K│▇ ▄▇ │     │
│ 38│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36                        │    247│ 527K│  ▇  │➘ ➘ ➘│
│ 39│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36; compatible;     │    242│ 189K│▇ ▃▂ │ ➘ ➘ │
│   │OAI-SearchBot/1.3; robots.txt; +https://openai.com/searchbot                                                                           │       │     │     │     │
│ 40│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36                        │    241│ 1.9M│▆ ▇▅ │     │
│ 41│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36                    │    231│ 2.2M│▇ ▅▃ │  ➘  │
│ 42│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36                  │    215│ 2.1M│▇ ▇▅ │     │
│ 43│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36                  │    206│ 1.7M│▇ ▇▆ │     │
│ 44│Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)                                                            │    192│ 1.5M│▆ ▄▇ │     │
│ 45│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.47  │    173│ 1.4M│▆ ▇▆ │     │
│ 46│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36                  │    171│ 1.4M│▄ ▇▆ │     │
│ 47│Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36                  │    168│ 1.2M│▇ ▆▄ │  ➘  │
│ 48│Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36, Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)   │    167│ 353K│▇    │➘ ➘ ➘│
│   │Chrome/120.0.0.0 Safari/537.36                                                                                                         │       │     │     │     │
│ 49│Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)                                                               │    166│ 1.4M│▅ ▃▇ │  ➚  │
│ 50│Mozilla/5.0 (compatible; crawler)                                                                                                      │    161│ 750K│  ▅▇ │  ➚  │

Caramba, a Meta está vindo demais...!

Bem, como não está sendo pego pelas configurações de DoS, provavelmente está fazendo requisições de forma relativamente lenta e adequada, mas ainda assim me preocupa. Dito isso, não quero bloquear completamente os Bots, e quero que funcione como uma WWW razoavelmente livre, então decidi implementar um limite de taxa.

Nginx

Como quero aplicar a quase tudo, ficou assim:

diff --git a/nginx.conf b/nginx.conf
index 10803c3..42f9390 100644
--- a/nginx.conf
+++ b/nginx.conf
@@ -15,6 +15,44 @@ http {
     server_tokens off;
     default_type application/octet-stream;
 
+    # Bot ou apenas crawlers de expansão de link são alvo de limite de taxa
+    map $http_user_agent $is_bot {
+        default 0;
+        ~*bot 1;
+        ~*crawler 1;
+        ~*spider 1;
+        ~*facebookexternalhit 1;
+        ~*slackbot 1;
+        ~*discordbot 1;
+        ~*twitterbot 1;
+        ~*linkedinbot 1;
+        ~*embedly 1;
+        ~*quora 1;
+        ~*skypeuripreview 1;
+        ~*whatsapp 1;
+        ~*telegrambot 1;
+        ~*applebot 1;
+        ~*pingdom 1;
+        ~*uptimerobot 1;
+    }
+
+    # stg.api.1btc.love é para fins de validação, então bots não são alvo de limite de taxa
+    # Se a chave for uma string vazia, não será contada por limit_req_zone
+    map $server_name $bot_limit_host_key {
+        stg.api.1btc.love "";
+        default $binary_remote_addr;
+    }
+
+    # Use a chave por IP apenas para bots, e uma string vazia para usuários humanos para não limitar
+    map $is_bot $bot_limit_key {
+        0 "";
+        1 $bot_limit_host_key;
+    }
+
+    limit_req_zone $bot_limit_key zone=bot:10m rate=1r/s;
+    limit_req_status 429;
+    limit_req zone=bot burst=5 nodelay;
+
     sendfile on;
     #Enables or disables buffering of responses from the proxied server.
     proxy_buffering on;

Ao aplicar isso à diretiva http de nível superior, também consegui definir regras de exclusão. Com isso, apenas 1 requisição por segundo será permitida para bots.

Fim.

Related Posts