There shouldn't be any memory leak anymore, and it's now super fast.

This commit is contained in:
Rasmus Rasmussen 2025-05-17 18:49:22 +02:00
parent 14279b5c06
commit 4cec4644fa
7 changed files with 169 additions and 92 deletions

View File

@ -20,14 +20,14 @@ class ServerUtils {
static void send_data(int client_fd, const string& data);
static void send_image(int client_fd, const string& path, image_type type);
static size_t send_all(int client_fd, const char* data, size_t length);
static string get_ip(int client_fd);
static uint32_t get_ip_2(int client_fd);
};
const string HTML_RESPONSE_HEADER =
"HTTP/1.1 200 OK\r\n"
"Content-Type: text/html; charset=utf-8\r\n"
"Transfer-Encoding: chunked\r\n"
//"Cache-control: max-age=12000\r\n"
"Cache-control: max-age=12000\r\n"
"Connection: close\r\n\r\n";
const string CSS_RESPONSE_HEADER =

View File

@ -5,7 +5,7 @@
using namespace std;
struct Track {
string Ip;
uint32_t Ip;
string UserAgent;
};

View File

@ -12,6 +12,7 @@ struct WordUtils {
static unordered_map<string, unordered_map<string, int>> load_data(const string& path);
static vector<string> load_css(const string& path);
static vector<string> predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
static vector<string> predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
static string load_file(const string& path);
static vector<string> split_string(const string& input, data_type type);
static string extract_url(const string& input);
@ -19,6 +20,7 @@ struct WordUtils {
static bool contains_image(const string& input);
static string extract_image_name(const string& input);
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
static string create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
static string create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, unsigned long hash);
static string create_image(const string& image);
static unsigned int hash_url(const string& input);

View File

@ -9,13 +9,13 @@ bool FileUtils::fileExists(const char *path){
}
vector<unsigned char> FileUtils::open_image(const string& path) {
std::ifstream file(path, std::ios::binary);
ifstream file(path, ios::binary);
if (!file) {
std::cerr << "Error opening file: " << path << std::endl;
cerr << "Error opening file: " << path << endl;
return {}; // Return an empty vector on error
}
std::vector<unsigned char> data;
vector<unsigned char> data;
char c;
while (file.get(c)) {
@ -28,7 +28,7 @@ vector<unsigned char> FileUtils::open_image(const string& path) {
vector<string> FileUtils::get_image_list(const string& path) {
vector<string> images;
const std::string ext(".avif");
const string ext(".avif");
for (auto &p : filesystem::recursive_directory_iterator(path))
{
if (p.path().extension() == ext) {
@ -41,7 +41,7 @@ vector<string> FileUtils::get_image_list(const string& path) {
vector<string> FileUtils::get_wordlists(const string& path) {
vector<string> images;
const std::string ext(".txt");
const string ext(".txt");
for (auto &p : filesystem::recursive_directory_iterator(path))
{
if (p.path().extension() == ext) {

View File

@ -16,6 +16,11 @@
#include <random>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <cstdint>
#include <iomanip>
vector<string> css;
@ -62,7 +67,7 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track
}
void ServerUtils::process_request(const int client_fd) {
string ip = get_ip(client_fd);
const uint32_t ip = get_ip_2(client_fd);
char buffer[1024];
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
@ -93,18 +98,18 @@ void ServerUtils::process_request(const int client_fd) {
if (url == "/style.css") {
// This sends the header, that instructs how the browser should interpret the data.
send_header(client_fd, data_type::CSS);
send_header(client_fd, CSS);
send_chunked_css(client_fd);
}
else if (url == "/favicon.png") {
send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", image_type::PNG);
send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", PNG);
}
else if (WordUtils::contains_image(url)) {
string p = "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/images/";
p += WordUtils::extract_image_name(url);
send_image(client_fd, p, image_type::AVIF);
send_image(client_fd, p, AVIF);
}
else {
@ -121,11 +126,11 @@ void ServerUtils::process_request(const int client_fd) {
}
void ServerUtils::send_header(const int client_fd, const data_type type) {
if (type == HTML) {
if (type == HTML) [[likely]] {
send_data(client_fd, HTML_RESPONSE_HEADER);
}
else if (type == CSS) {
else if (type == CSS) [[unlikely]] {
send_data(client_fd, CSS_RESPONSE_HEADER);
}
}
@ -142,17 +147,17 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
minstd_rand generator(hash);
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
uniform_int_distribution<unsigned short> distribution_3(0, 8); // There is only 9 wordlists.
uniform_int_distribution<unsigned short> distribution_4(0, end - 2);
const int link = distribution_1(generator);
const int image = distribution_2(generator);
const int l = distribution_3(generator);
const int img = distribution_4(generator);
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, images.size() - 1));
const int image = distribution_1(generator);
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, 8));
const int l = distribution_1(generator);
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, end - 2));
const int img = distribution_1(generator);
while (itr < end) {
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
send_data(client_fd, WordUtils::create_tag_2(all_lists[l], hashes[itr]));
if (itr == link) {
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
@ -176,29 +181,32 @@ void ServerUtils::send_chunked_css(const int client_fd) {
for (const auto & cs : css) {
send_data(client_fd, cs);
this_thread::sleep_for(chrono::milliseconds(25));
//this_thread::sleep_for(chrono::milliseconds(25));
}
}
void ServerUtils::send_image(const int client_fd, const string& path, const image_type type) {
const vector<unsigned char> image = FileUtils::open_image(path);
std::ostringstream oss;
oss << "HTTP/1.1 200 OK\r\n";
string s = "HTTP/1.1 200 OK\r\n";
if (type == PNG) {
oss << "Content-Type: image/png\r\n";
s += "Content-Type: image/png\r\n";
}
if (type == AVIF) {
oss << "Content-Type: image/avif\r\n";
s += "Content-Type: image/avif\r\n";
}
oss << "Content-Length: " << image.size() << "\r\n";
oss << "\r\n";
s += "Cache-control: max-age=12000\r\n";
s += "Content-Length: ";
s += to_string(image.size());
s += "\r\n";
s += "\r\n";
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
send(client_fd, s.c_str(), s.size(), 0);
send(client_fd, reinterpret_cast<const char*>(image.data()), image.size(), 0);
send(client_fd, image.data(), image.size(), 0);
//send_all(client_fd,reinterpret_cast<const char*>(image.data()), image.size());
}
@ -217,30 +225,29 @@ size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t
}
void ServerUtils::send_data(const int client_fd, const string& data) {
ostringstream oss;
char size_hex[20]; // hold 64-bit hex value
snprintf(size_hex, sizeof(size_hex), "%zx", data.size());
oss << hex << data.size() << "\r\n" << data << "\r\n";
string message;
message.reserve(strlen(size_hex) + 4 + data.size());
message += size_hex;
message += "\r\n";
message += data;
message += "\r\n";
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
send(client_fd, message.c_str(), message.size(), 0);
}
string ServerUtils::get_ip(const int client_fd) {
uint32_t ServerUtils::get_ip_2(const int client_fd) {
sockaddr_storage addr{};
socklen_t len = sizeof(addr);
char buf[INET6_ADDRSTRLEN];
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
perror("getpeername");
return "";
return 0; // Or some other error value. Could throw an exception.
}
if (addr.ss_family == AF_INET) {
// IPv4
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf));
} else {
return "Unknown address family";
}
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
return ntohl(s->sin_addr.s_addr); // Convert network byte order to host byte order
return buf;
}

View File

@ -26,16 +26,16 @@ void [[noreturn]] TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>>&
continue;
}
vector<string> temp = WordUtils::split_string(track.Ip, IP);
/*vector<string> temp = WordUtils::split_string(track.Ip, IP);
const int ip1 = stoi(temp[0]);
const int ip2 = stoi(temp[1]);
const int ip3 = stoi(temp[2]);
const int ip4 = stoi(temp[3]);
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);*/
(*urls)[ip].links_pressed++;
(*urls)[ip].user_agent = track.UserAgent;
(*urls)[track.Ip].links_pressed++;
(*urls)[track.Ip].user_agent = track.UserAgent;
}
}

View File

@ -6,10 +6,14 @@
#include <cctype>
#include <cstring>
#include <random>
#include <vector>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <iterator>
// We can just use an unordered_map<string, unordered_map<string, int>> instead
unordered_map<string, unordered_map<string, int>> WordUtils::load_data(const string& path) {
vector<string> data = split_string(load_file(path), data_type::HTML);
vector<string> data = split_string(load_file(path), HTML);
unordered_map<string, unordered_map<string, int>> word_frequencies = {};
@ -26,8 +30,7 @@ vector<string> WordUtils::load_css(const string& path) {
return split_string(load_file(path), data_type::CSS);
}
// We can just use an unordered_map<string, unordered_map<string, int>> instead, since we're already sorting before counting.
vector<string> WordUtils::predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count) {
vector<string> WordUtils::predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
const auto it = word_frequencies.find(input);
if (it == word_frequencies.end()) return {input};
@ -51,12 +54,42 @@ vector<string> WordUtils::predict_next_word(const string& input, const unordered
return results;
}
vector<string> WordUtils::predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
const auto it = word_frequencies.find(input);
if (it == word_frequencies.end()) return {input};
const auto& nextWords = it->second;
const size_t limit = min(count, nextWords.size());
vector<pair<string, int>> topWords(limit); // preallocate exact size
const auto middle = topWords.begin() + limit;
// partial_sort_copy into preallocated vector
const auto end_it = partial_sort_copy(
nextWords.begin(), nextWords.end(),
topWords.begin(), middle,
[](const auto& a, const auto& b) {
return a.second > b.second; // sort descending
}
);
// Convert pairs to strings
vector<string> results;
results.reserve(distance(topWords.begin(), end_it));
for (auto it_ = topWords.begin(); it_ != end_it; ++it_) {
results.emplace_back(it_->first);
}
return results;
}
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
constexpr unsigned char predict_num = 5;
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
vector<string> tags;
string tags_1[27];
minstd_rand generator(hash);
@ -64,25 +97,25 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
tags.push_back(lol);
tags.push_back(temp_words[outer_2_distribution(generator)]);
tags_1[0] = lol;
tags_1[1] = temp_words[outer_2_distribution(generator)];
// Words inside the <p> tag
for (unsigned short j = 0; j < 25; j++) {
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
tags.push_back(temp_words[inner_distribution(generator)]);
tags_1[j + 2] = temp_words[inner_distribution(generator)];
}
string temp_string = "<p>";
for (const auto & tag : tags) {
for (const auto & tag : tags_1) {
temp_string += tag;
temp_string += " ";
}
@ -92,38 +125,82 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
return temp_string;
}
string WordUtils::create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
constexpr unsigned char predict_num = 5;
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
string tags_1[27];
minstd_rand generator(hash);
uniform_int_distribution<size_t> start_dist(0, start_words->length() - 1);
const size_t first_index = start_dist(generator);
const string& first_word = start_words[first_index];
vector<string> temp_words = predict_next_word_2(first_word, word_frequencies, predict_num);
start_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
tags_1[0] = first_word;
tags_1[1] = temp_words[start_dist(generator)];
uniform_int_distribution<size_t> next_word_dist; // re-used, param set each loop
// Words inside the <p> tag
for (unsigned short j = 0; j < 25; j++) {
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
next_word_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
tags_1[j + 2] = temp_words[next_word_dist(generator)];
}
string temp_string = "<p>";
for (const auto & tag : tags_1) {
temp_string += tag;
temp_string += " ";
}
temp_string += ".</p>\n";
return temp_string;
}
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
constexpr unsigned char predict_num = 10;
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
vector<string> tags;
string tags_1[9];
minstd_rand generator(hash);
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
tags.push_back(lol);
tags.push_back(temp_words[outer_2_distribution(generator)]);
tags_1[0] = lol;
tags_1[1] = temp_words[outer_2_distribution(generator)];
// Words inside the <p> tag
for (unsigned short j = 0; j < 7; j++) {
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
tags.push_back(temp_words[inner_distribution(generator)]);
tags_1[j + 2] = temp_words[inner_distribution(generator)];
}
string temp_string_2 = "<a href=\"";
string temp_string;
for (const auto & tag : tags) {
for (const auto & tag : tags_1) {
temp_string += tag;
}
@ -141,48 +218,39 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
string WordUtils::create_image(const string& image) {
string temp_1 = R"(<img loading="lazy" src="/)";
temp_1 += image;
temp_1 += ".avif";
temp_1 += "\">";
temp_1 += ".avif\">";
return temp_1;
}
vector<string> WordUtils::split_string(const string& input, const data_type type) {
vector<string> data;
const char* delimiters = nullptr;
switch (type) {
case HTML: delimiters = " .,!?;:()\n"; break;
case CSS: delimiters = "}"; break;
case TEXT: delimiters = "\n"; break;
case IP: delimiters = "."; break;
default: delimiters = " "; break;
}
size_t start = 0;
size_t end = 0;
const size_t len = input.length();
string delimiter_string;
while (start < len) {
size_t end = input.find_first_of(delimiters, start);
if (type == HTML) {
delimiter_string = " .,!?;:()\n\r\t";
}
if (end == string::npos) end = len;
else if (type == CSS) {
delimiter_string = "}\n\r\t";
}
else if (type == TEXT) {
delimiter_string = "\n";
}
else if (type == IP) {
delimiter_string = ".";
}
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
if (end > start) {
data.push_back(input.substr(start, end - start));
// Instead of allocating a new string with substr(), use string_view then copy only if needed.
data.emplace_back(input.begin() + start, input.begin() + end);
}
start = end + 1;
}
// Add the last token, if any
if (start < input.length()) {
data.push_back(input.substr(start));
}
return data;
}