Somewhat production ready now. It's a bit slow, with 14 clients hammering on the server, the CPU reaches 80% ussage, and 16MB memory footprint.

This commit is contained in:
Rasmus Rasmussen 2025-05-16 21:31:04 +02:00
parent 18581ed9ac
commit 14279b5c06
13 changed files with 279 additions and 72 deletions

View File

@ -153,6 +153,30 @@ src/FileUtils.cpp.s:
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/FileUtils.cpp.s
.PHONY : src/FileUtils.cpp.s
src/MetricsExporter.o: src/MetricsExporter.cpp.o
.PHONY : src/MetricsExporter.o
# target to build an object file
src/MetricsExporter.cpp.o:
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.o
.PHONY : src/MetricsExporter.cpp.o
src/MetricsExporter.i: src/MetricsExporter.cpp.i
.PHONY : src/MetricsExporter.i
# target to preprocess a source file
src/MetricsExporter.cpp.i:
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.i
.PHONY : src/MetricsExporter.cpp.i
src/MetricsExporter.s: src/MetricsExporter.cpp.s
.PHONY : src/MetricsExporter.s
# target to generate assembly for a file
src/MetricsExporter.cpp.s:
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.s
.PHONY : src/MetricsExporter.cpp.s
src/ServerUtils.o: src/ServerUtils.cpp.o
.PHONY : src/ServerUtils.o
@ -261,6 +285,9 @@ help:
@echo "... src/FileUtils.o"
@echo "... src/FileUtils.i"
@echo "... src/FileUtils.s"
@echo "... src/MetricsExporter.o"
@echo "... src/MetricsExporter.i"
@echo "... src/MetricsExporter.s"
@echo "... src/ServerUtils.o"
@echo "... src/ServerUtils.i"
@echo "... src/ServerUtils.s"

View File

@ -10,9 +10,9 @@ using namespace std;
template <typename T>
class ConcurrentQueue {
private:
std::queue<T> queue_;
mutable std::mutex mutex_;
std::condition_variable condition_;
queue<T> queue_;
mutable mutex mutex_;
condition_variable condition_;
public:
// Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class.
@ -49,6 +49,11 @@ public:
lock_guard<mutex> lock(mutex_);
return queue_.empty();
}
int size() {
lock_guard<mutex> lock(mutex_);
return queue_.size();
}
};
#endif

13
include/Crawler.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef CRAWLER_H
#define CRAWLER_H
#include <string>
using namespace std;
struct Crawler {
string user_agent;
int links_pressed;
};
#endif

View File

@ -4,7 +4,9 @@
enum data_type {
HTML,
CSS,
IMAGE
IMAGE,
TEXT,
IP
};
enum image_type {

34
include/MetricsExporter.h Normal file
View File

@ -0,0 +1,34 @@
//
// Created by skingging on 5/16/25.
//
#ifndef METRICSEXPORTER_H
#define METRICSEXPORTER_H
#include <memory>
#include <unordered_map>
#include "../include/Crawler.h"
using namespace std;
struct MetricsExporter {
static void serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler);
static void process_request(int client_fd);
static void send_data(int client_fd, const string& data);
};
const string HTML_RESPONSE_HEADER_lol =
"HTTP/1.1 200 OK\r\n"
"Content-Type: text/html; charset=utf-8\r\n"
"Transfer-Encoding: chunked\r\n"
//"Cache-control: max-age=12000\r\n"
"Connection: close\r\n\r\n";
const string BEGINNING =
"<!DOCTYPE html>\n<html><head>\r\n"
"<title>Drip</title>\r\n"
"</head><body>\r\n";
const string END = "</body></html>\r\n";
#endif

View File

@ -11,7 +11,7 @@ using namespace std;
class ServerUtils {
public:
static void serve(shared_ptr<ConcurrentQueue<Track>> t_test);
static void serve(shared_ptr<ConcurrentQueue<Track>> cq_track);
private:
static void process_request(int client_fd);
static void send_header(int client_fd, data_type type);
@ -27,7 +27,7 @@ const string HTML_RESPONSE_HEADER =
"HTTP/1.1 200 OK\r\n"
"Content-Type: text/html; charset=utf-8\r\n"
"Transfer-Encoding: chunked\r\n"
"Cache-control: max-age=12000\r\n"
//"Cache-control: max-age=12000\r\n"
"Connection: close\r\n\r\n";
const string CSS_RESPONSE_HEADER =

View File

@ -4,13 +4,14 @@
#include <unordered_map>
#include "../include/ConcurrentQueue.h"
#include "../include/Crawler.h"
#include "../include/Track.h"
using namespace std;
struct TrackerUtils {
static void track(const shared_ptr<ConcurrentQueue<Track>>& t_test);
static void print(unordered_map<string, int> tracks);
static void track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler);
static void print();
};
#endif

View File

@ -15,6 +15,7 @@ struct WordUtils {
static string load_file(const string& path);
static vector<string> split_string(const string& input, data_type type);
static string extract_url(const string& input);
static string extract_user_agent(const string& input);
static bool contains_image(const string& input);
static string extract_image_name(const string& input);
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);

72
src/MetricsExporter.cpp Normal file
View File

@ -0,0 +1,72 @@
#include "../include/MetricsExporter.h"
#include <string>
#include <utility>
#include <vector>
#include <iostream>
#include <sstream>
#include <unistd.h>
#include <netinet/in.h>
#include <thread>
#include <chrono>
#include <cstring>
#include <random>
#include <ranges>
#include <sys/socket.h>
shared_ptr<unordered_map<uint32_t, Crawler>> crawler_;
void [[noreturn]] MetricsExporter::serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler) {
crawler_ = move(crawler);
// server_fd is a file descriptor.
const int server_fd = socket(AF_INET, SOCK_STREAM, 0);
sockaddr_in addr{};
addr.sin_family = AF_INET;
addr.sin_port = htons(8889);
addr.sin_addr.s_addr = INADDR_ANY;
if (bind(server_fd, reinterpret_cast<sockaddr *>(&addr), sizeof(addr)) != 0) {
cout << "Please wait for the pipe to close.";
return;
}
listen(server_fd, 50);
cout << "Server is running on http://localhost:8889 \n";
while (true) {
int client_fd = accept(server_fd, nullptr, nullptr);
thread(process_request, client_fd).detach();
}
close(server_fd);
}
void MetricsExporter::process_request(const int client_fd) {
send_data(client_fd, HTML_RESPONSE_HEADER_lol);
ostringstream oss;
oss << BEGINNING;
for (auto it = crawler_->begin(); it != crawler_->end(); ++it) {
oss << it->second.links_pressed << ": " << it->second.user_agent << endl;
}
oss << END;
send_data(client_fd, oss.str());
send(client_fd, "0\r\n\r\n", 5, 0);
close(client_fd);
}
void MetricsExporter::send_data(const int client_fd, const string& data) {
ostringstream oss;
oss << hex << data.size() << "\r\n" << data << "\r\n";
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
}

View File

@ -21,10 +21,10 @@
vector<string> css;
vector<string> images;
vector<unordered_map<string, unordered_map<string, int>>> all_lists;
shared_ptr<ConcurrentQueue<Track>> t_test;
shared_ptr<ConcurrentQueue<Track>> tracks;
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
t_test = std::move(test);
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track) {
tracks = std::move(cq_track);
css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css");
images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/");
const vector<string> words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/");
@ -53,6 +53,8 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
while (true) {
int client_fd = accept(server_fd, nullptr, nullptr);
if (client_fd == -1) continue;
//cerr << client_fd << endl;
thread(process_request, client_fd).detach();
}
@ -60,24 +62,35 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
}
void ServerUtils::process_request(const int client_fd) {
string ip = get_ip(client_fd);
char buffer[1024];
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
string url;
string user_agent;
if (bytes_received > 0) {
buffer[bytes_received] = '\0';
url = WordUtils::extract_url(string(buffer));
user_agent = WordUtils::extract_user_agent(string(buffer));
if (url.empty()) {
close(client_fd);
cerr << "AAAA \n";
}
} else {
close(client_fd);
cerr << "AAAA \n";
}
Track track;
track.Ip = ip;
track.UserAgent = user_agent;
tracks->push(track);
if (url == "/style.css") {
// This sends the header, that instructs how the browser should interpret the data.
send_header(client_fd, data_type::CSS);
@ -95,13 +108,6 @@ void ServerUtils::process_request(const int client_fd) {
}
else {
Track track;
track.Ip = get_ip(client_fd);
// Testing only.
track.UserAgent = url;
t_test->push(track);
const unsigned long hash3 = WordUtils::fnv1aHash(url);
send_header(client_fd, HTML);
@ -137,11 +143,13 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
minstd_rand generator(hash);
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
uniform_int_distribution<unsigned short> distribution_3(0, 8);
uniform_int_distribution<unsigned short> distribution_3(0, 8); // There is only 9 wordlists.
uniform_int_distribution<unsigned short> distribution_4(0, end - 2);
const int link = distribution_1(generator);
const int image = distribution_2(generator);
const int l = distribution_3(generator);
const int img = distribution_4(generator);
while (itr < end) {
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
@ -150,11 +158,11 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
}
if (itr == link) {
if (itr == img) {
send_data(client_fd, WordUtils::create_image(images[image]));
}
this_thread::sleep_for(chrono::milliseconds(25));
//this_thread::sleep_for(chrono::milliseconds(75));
itr++;
}
@ -165,8 +173,8 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
}
void ServerUtils::send_chunked_css(const int client_fd) {
for (size_t i = 0; i < css.size(); i++) {
send_data(client_fd, css[i]);
for (const auto & cs : css) {
send_data(client_fd, cs);
this_thread::sleep_for(chrono::milliseconds(25));
}
@ -219,7 +227,7 @@ void ServerUtils::send_data(const int client_fd, const string& data) {
string ServerUtils::get_ip(const int client_fd) {
sockaddr_storage addr{};
socklen_t len = sizeof(addr);
char ipstr[INET6_ADDRSTRLEN];
char buf[INET6_ADDRSTRLEN];
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
perror("getpeername");
@ -228,15 +236,11 @@ string ServerUtils::get_ip(const int client_fd) {
if (addr.ss_family == AF_INET) {
// IPv4
sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof(ipstr));
} else if (addr.ss_family == AF_INET6) {
// IPv6
sockaddr_in6* s = reinterpret_cast<sockaddr_in6 *>(&addr);
inet_ntop(AF_INET6, &s->sin6_addr, ipstr, sizeof(ipstr));
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf));
} else {
return "Unknown address family";
}
return string(ipstr);
return buf;
}

View File

@ -1,34 +1,53 @@
#include "../include/TrackerUtils.h"
#include "../include/ConcurrentQueue.h"
#include "../include/TrackerUtils.h"
#include "../include/WordUtils.h"
#include "../include/Crawler.h"
#include "../include/Track.h"
#include <iostream>
#include <unordered_map>
#include <iostream>
#include <utility>
#include <thread>
#include <chrono>
#include <utility>
#include <ranges>
#include <cstdint>
void TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>> &t_test) {
bool running = true;
unordered_map<string, int> urls;
shared_ptr<unordered_map<uint32_t, Crawler>> urls;
void [[noreturn]] TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler) {
urls = crawler;
constexpr bool running = true;
while (running) {
auto [Ip, UserAgent] = t_test->wait_and_pop();
Track track;
urls[UserAgent]++;
system("clear");
print(urls);
if (Ip == "STOP") {
running = false;
if (!s_track->try_pop(track)) {
this_thread::sleep_for(chrono::milliseconds(100));
continue;
}
vector<string> temp = WordUtils::split_string(track.Ip, IP);
const int ip1 = stoi(temp[0]);
const int ip2 = stoi(temp[1]);
const int ip3 = stoi(temp[2]);
const int ip4 = stoi(temp[3]);
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);
(*urls)[ip].links_pressed++;
(*urls)[ip].user_agent = track.UserAgent;
}
}
void TrackerUtils::print(unordered_map<string, int> tracks) {
for (auto it = tracks.begin(); it != tracks.end(); ++it) {
cerr << it->first << ": " << it->second << endl;
}
void [[noreturn]] TrackerUtils::print() {
this_thread::sleep_for(chrono::milliseconds(5000));
this_thread::sleep_for(chrono::milliseconds(250));
while (true) {
for (const auto &[user_agent, links_pressed]: *urls | views::values) {
cerr << links_pressed << ": " << user_agent << endl;
}
this_thread::sleep_for(chrono::milliseconds(1000));
system("clear"); // Testing only
}
}

View File

@ -54,7 +54,7 @@ vector<string> WordUtils::predict_next_word(const string& input, const unordered
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
constexpr unsigned char predict_num = 5;
const string start_words[3] = {"the", "but", "with"};
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
vector<string> tags;
@ -62,15 +62,17 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
tags.push_back(lol);
tags.push_back(temp_words[outer_2_distribution(generator)]);
// Words inside the <p> tag
for (unsigned short j = 0; j < 25; j++)
{
for (unsigned short j = 0; j < 25; j++) {
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
@ -80,8 +82,8 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
string temp_string = "<p>";
for (size_t l = 0; l < tags.size(); l++) {
temp_string += tags[l];
for (const auto & tag : tags) {
temp_string += tag;
temp_string += " ";
}
@ -91,25 +93,26 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
}
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
constexpr unsigned char predict_num = 5;
constexpr unsigned char predict_num = 10;
const string start_words[10] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
vector<string> tags;
minstd_rand generator(hash);
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
tags.push_back(lol);
tags.push_back(temp_words[outer_2_distribution(generator)]);
// Words inside the <p> tag
for (unsigned short j = 0; j < 7; j++)
{
for (unsigned short j = 0; j < 7; j++) {
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
@ -120,8 +123,8 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
string temp_string_2 = "<a href=\"";
string temp_string;
for (size_t l = 0; l < tags.size(); l++) {
temp_string += tags[l];
for (const auto & tag : tags) {
temp_string += tag;
}
temp_string_2 += temp_string;
@ -136,7 +139,6 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
}
string WordUtils::create_image(const string& image) {
string temp_1 = R"(<img loading="lazy" src="/)";
temp_1 += image;
temp_1 += ".avif";
@ -144,23 +146,29 @@ string WordUtils::create_image(const string& image) {
return temp_1;
}
vector<string> WordUtils::split_string(const string& input, data_type type) {
vector<string> WordUtils::split_string(const string& input, const data_type type) {
vector<string> data;
size_t start = 0;
size_t end = 0;
// Create a string from the delimiters array
string delimiter_string;
if (type == data_type::HTML) {
if (type == HTML) {
delimiter_string = " .,!?;:()\n\r\t";
}
else if (type == data_type::CSS) {
else if (type == CSS) {
delimiter_string = "}\n\r\t";
}
else if (type == TEXT) {
delimiter_string = "\n";
}
else if (type == IP) {
delimiter_string = ".";
}
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
if (end > start) {
@ -209,8 +217,22 @@ string WordUtils::extract_url(const string& input) {
return first_line.substr(method_end + 1, path_end - method_end - 1);
}
string WordUtils::extract_user_agent(const string& input) {
if (input.empty()) return "";
vector<string> lines = split_string(input, TEXT);
const unsigned short first_line_end = lines[2].find('\n');
if (first_line_end == string::npos) return "";
string first_line = lines[2].substr(12, first_line_end);
return first_line;
}
string WordUtils::extract_image_name(const string& input) {
const unsigned short first_line_end = input.find('f');
if (first_line_end == string::npos) return "";
string first_line = input.substr(1, first_line_end);
@ -244,6 +266,7 @@ unsigned int WordUtils::hash_url(const string& input) {
unsigned long WordUtils::djb2Hash(const string& str) {
unsigned long hash = 5381;
for (const char c : str) {
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}

View File

@ -7,6 +7,7 @@
#include "../include/ConcurrentQueue.h"
#include "../include/TrackerUtils.h"
#include "../include/Track.h"
#include "../include/MetricsExporter.h"
using namespace std;
@ -23,9 +24,14 @@ int main(int argc, const char* argv[]) {
return 0;
}*/
auto queue = std::make_shared<ConcurrentQueue<Track>>();
cout << "lol" << endl;
thread(TrackerUtils::track, queue).detach();
auto queue = std::make_shared<ConcurrentQueue<Track>>();
auto metrics = std::make_shared<unordered_map<uint32_t, Crawler>>();
thread(TrackerUtils::track, queue, metrics).detach();
thread(MetricsExporter::serve, metrics).detach();
//argv[1]
signal(SIGPIPE, SIG_IGN);