Everything 'should' just work

This commit is contained in:
Rasmus Rasmussen 2025-05-21 15:09:16 +02:00
parent 50e6f227a0
commit 6f43ab070e
9 changed files with 34 additions and 135 deletions

View File

@ -14,7 +14,8 @@ using namespace std;
struct MetricsExporter {
static void serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler);
static void process_request(int client_fd);
static void send_data(int client_fd, const string& data);
};
inline string HEAD = "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r\n";
#endif

View File

@ -19,8 +19,7 @@ class ServerUtils {
static void send_css(int client_fd);
static void send_data(int client_fd, const string& data);
static void send_image(int client_fd, const string& path, image_type type);
static size_t send_all(int client_fd, const char* data, size_t length);
static uint32_t get_ip_2(int client_fd);
static uint32_t get_ip(int client_fd);
};
const string HTML_RESPONSE_HEADER =

View File

@ -9,4 +9,4 @@ struct Track {
string UserAgent;
};
#endif //TRACK_H
#endif

View File

@ -12,7 +12,6 @@ struct WordUtils {
static unordered_map<string, unordered_map<string, int>> load_data(const string& path);
static string load_css(const string& path);
static vector<string> predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
static vector<string> predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
static string load_file(const string& path);
static vector<string> split_string(const string& input, data_type type);
static string extract_url(const string& input);
@ -20,7 +19,6 @@ struct WordUtils {
static bool contains_image(const string& input);
static string extract_image_name(const string& input);
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
static string create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
static string create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, unsigned long hash);
static string create_image(const string& image);
static unsigned int hash_url(const string& input);

View File

@ -29,8 +29,7 @@ vector<unsigned char> FileUtils::open_image(const string& path) {
vector<string> FileUtils::get_image_list(const string& path) {
vector<string> images;
const string ext(".avif");
for (auto &p : filesystem::recursive_directory_iterator(path))
{
for (auto &p : filesystem::recursive_directory_iterator(path)) {
if (p.path().extension() == ext) {
images.push_back(p.path().stem().string());
}
@ -42,8 +41,7 @@ vector<string> FileUtils::get_image_list(const string& path) {
vector<string> FileUtils::get_wordlists(const string& path) {
vector<string> images;
const string ext(".txt");
for (auto &p : filesystem::recursive_directory_iterator(path))
{
for (auto &p : filesystem::recursive_directory_iterator(path)) {
if (p.path().extension() == ext) {
images.push_back(p.path().string());
}

View File

@ -46,11 +46,9 @@ void [[noreturn]] MetricsExporter::serve(shared_ptr<unordered_map<uint32_t, Craw
}
void MetricsExporter::process_request(const int client_fd) {
send_data(client_fd, "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r\n");
ostringstream oss;
oss << "links_pressed_total: ";
oss << HEAD;
int total = 0;
@ -58,16 +56,10 @@ void MetricsExporter::process_request(const int client_fd) {
total += links_pressed;
}
oss << total << "\r\n";
oss << "# HELP links_pressed_total Total number of links pressed.\n";
oss << "# TYPE links_pressed_total counter\n";
oss << "links_pressed_total " << total << "\n";
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
close(client_fd);
}
void MetricsExporter::send_data(const int client_fd, const string& data) {
ostringstream oss;
oss << hex << data.size() << "\r\n" << data << "\r\n";
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
}

View File

@ -17,7 +17,6 @@
#include <sys/socket.h>
#include <cstdint>
string css;
vector<string> images;
vector<unordered_map<string, unordered_map<string, int>>> all_lists;
@ -64,7 +63,7 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track
}
void ServerUtils::process_request(const int client_fd) {
const uint32_t ip = get_ip_2(client_fd);
const uint32_t ip = get_ip(client_fd);
char buffer[1024];
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
@ -84,26 +83,24 @@ void ServerUtils::process_request(const int client_fd) {
}
} else {
close(client_fd);
cerr << "AAAA \n";
cerr << "BBBB \n";
}
if (url == "/style.css") {
// This sends the header, that instructs how the browser should interpret the data.
//send_header(client_fd, CSS);
if (url == "/style.css") [[unlikely]] {
send_css(client_fd);
}
else if (url == "/favicon.png") {
else if (url == "/favicon.png") [[unlikely]] {
send_image(client_fd, options_.favicon, PNG);
}
else if (WordUtils::contains_image(url)) {
else if (WordUtils::contains_image(url)) [[unlikely]] {
string p = options_.images;
p += WordUtils::extract_image_name(url);
send_image(client_fd, p, AVIF);
}
else {
else [[likely]] {
Track track;
track.Ip = ip;
track.UserAgent = user_agent;
@ -112,7 +109,6 @@ void ServerUtils::process_request(const int client_fd) {
const unsigned long hash3 = WordUtils::fnv1aHash(url);
//send_header(client_fd, HTML);
send_html(client_fd, hash3);
}
@ -144,17 +140,14 @@ void ServerUtils::send_html(const int client_fd, const size_t hash) {
html += HTML_MAIN_1;
while (itr < end) {
html += WordUtils::create_tag_2(all_lists[l], hashes[itr]);
//send_data(client_fd, WordUtils::create_tag_2(all_lists[l], hashes[itr]));
html += WordUtils::create_tag(all_lists[l], hashes[itr]);
if (itr == link) {
html += WordUtils::create_link(all_lists[l], hash);
//send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
}
if (itr == img) {
html += WordUtils::create_image(images[image]);
//send_data(client_fd, WordUtils::create_image(images[image]));
}
itr++;
@ -176,11 +169,11 @@ void ServerUtils::send_image(const int client_fd, const string& path, const imag
string s = "HTTP/1.1 200 OK\r\n";
if (type == PNG) {
if (type == PNG) [[unlikely]] {
s += "Content-Type: image/png\r\n";
}
if (type == AVIF) {
if (type == AVIF) [[likely]] {
s += "Content-Type: image/avif\r\n";
}
@ -193,34 +186,18 @@ void ServerUtils::send_image(const int client_fd, const string& path, const imag
send(client_fd, s.c_str(), s.size(), 0);
send(client_fd, image.data(), image.size(), 0);
//send_all(client_fd,reinterpret_cast<const char*>(image.data()), image.size());
}
size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t length) {
size_t total_sent = 0;
while (total_sent < length) {
const size_t sent = send(client_fd, data + total_sent, length - total_sent, 0);
if (sent <= 0) {
return sent; // Error or connection closed
}
total_sent += sent;
}
return total_sent;
}
void ServerUtils::send_data(const int client_fd, const string& data) {
send(client_fd, data.c_str(), data.size(), 0);
}
uint32_t ServerUtils::get_ip_2(const int client_fd) {
uint32_t ServerUtils::get_ip(const int client_fd) {
sockaddr_storage addr{};
socklen_t len = sizeof(addr);
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
perror("getpeername");
return 0; // Or some other error value. Could throw an exception.
return 0;
}
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);

View File

@ -32,30 +32,6 @@ string WordUtils::load_css(const string& path) {
vector<string> WordUtils::predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
const auto it = word_frequencies.find(input);
if (it == word_frequencies.end()) return {input};
const unordered_map<string, int> nextWords = it->second;
vector<pair<string, int>> sortedWords(nextWords.begin(), nextWords.end());
// Sort by frequency (descending)
ranges::sort(sortedWords,[](const auto& a, const auto& b) {
return a.second > b.second;
});
vector<string> results;
// Take up to "count" most common words
for (size_t i = 0; i < min(count, sortedWords.size()); ++i) {
results.push_back(sortedWords[i].first);
}
return results;
}
vector<string> WordUtils::predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
const auto it = word_frequencies.find(input);
if (it == word_frequencies.end()) return {input};
const auto& nextWords = it->second;
@ -93,52 +69,11 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
minstd_rand generator(hash);
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
tags_1[0] = lol;
tags_1[1] = temp_words[outer_2_distribution(generator)];
// Words inside the <p> tag
for (unsigned short j = 0; j < 25; j++) {
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
tags_1[j + 2] = temp_words[inner_distribution(generator)];
}
string temp_string = "<p>";
for (const auto & tag : tags_1) {
temp_string += tag;
temp_string += " ";
}
temp_string += ".</p>\n";
return temp_string;
}
string WordUtils::create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
constexpr unsigned char predict_num = 5;
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
string tags_1[27];
minstd_rand generator(hash);
uniform_int_distribution<size_t> start_dist(0, start_words->length() - 1);
const size_t first_index = start_dist(generator);
const string& first_word = start_words[first_index];
vector<string> temp_words = predict_next_word_2(first_word, word_frequencies, predict_num);
vector<string> temp_words = predict_next_word(first_word, word_frequencies, predict_num);
start_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
@ -149,7 +84,7 @@ string WordUtils::create_tag_2(const unordered_map<string, unordered_map<string,
// Words inside the <p> tag
for (unsigned short j = 0; j < 25; j++) {
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
temp_words = predict_next_word(tags_1[j], word_frequencies, predict_num);
next_word_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
@ -181,7 +116,7 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
const string lol = start_words[outer_distribution(generator)];
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
@ -190,7 +125,7 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
// Words inside the <p> tag
for (unsigned short j = 0; j < 7; j++) {
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
temp_words = predict_next_word(tags_1[j], word_frequencies, predict_num);
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
@ -241,10 +176,9 @@ vector<string> WordUtils::split_string(const string& input, const data_type type
while (start < len) {
size_t end = input.find_first_of(delimiters, start);
if (end == string::npos) end = len;
if (end == string::npos) [[unlikely]] end = len;
if (end > start) {
// Instead of allocating a new string with substr(), use string_view then copy only if needed.
if (end > start) [[likely]] {
data.emplace_back(input.begin() + start, input.begin() + end);
}
@ -272,26 +206,26 @@ string WordUtils::extract_url(const string& input) {
if (input.empty()) return "";
const unsigned short first_line_end = input.find('\n');
if (first_line_end == string::npos) return "";
if (first_line_end == string::npos) [[unlikely]] return "";
string first_line = input.substr(0, first_line_end);
const unsigned short method_end = first_line.find(' ');
if (method_end == string::npos) return "";
if (method_end == string::npos) [[unlikely]] return "";
const unsigned short path_end = first_line.find(' ', method_end + 1);
if (path_end == string::npos) return "";
if (path_end == string::npos) [[unlikely]] return "";
return first_line.substr(method_end + 1, path_end - method_end - 1);
}
string WordUtils::extract_user_agent(const string& input) {
if (input.empty()) return "";
if (input.empty()) [[unlikely]] return "";
vector<string> lines = split_string(input, TEXT);
const unsigned short first_line_end = lines[2].find('\n');
if (first_line_end == string::npos) return "";
if (first_line_end == string::npos) [[unlikely]] return "";
string first_line = lines[2].substr(12, first_line_end);
@ -301,7 +235,7 @@ string WordUtils::extract_user_agent(const string& input) {
string WordUtils::extract_image_name(const string& input) {
const unsigned short first_line_end = input.find('f');
if (first_line_end == string::npos) return "";
if (first_line_end == string::npos) [[unlikely]] return "";
string first_line = input.substr(1, first_line_end);
@ -311,7 +245,7 @@ string WordUtils::extract_image_name(const string& input) {
bool WordUtils::contains_image(const string& input) {
const unsigned short first_line_end = input.find('.');
if (first_line_end == 65535) return false;
if (first_line_end == 65535) [[unlikely]] return false;
string type = input.substr(first_line_end + 1, 5);

BIN
tarpit Executable file

Binary file not shown.