diff --git a/include/ServerUtils.h b/include/ServerUtils.h index 582dbc0..f2b499c 100644 --- a/include/ServerUtils.h +++ b/include/ServerUtils.h @@ -20,14 +20,14 @@ class ServerUtils { static void send_data(int client_fd, const string& data); static void send_image(int client_fd, const string& path, image_type type); static size_t send_all(int client_fd, const char* data, size_t length); - static string get_ip(int client_fd); + static uint32_t get_ip_2(int client_fd); }; const string HTML_RESPONSE_HEADER = "HTTP/1.1 200 OK\r\n" "Content-Type: text/html; charset=utf-8\r\n" "Transfer-Encoding: chunked\r\n" - //"Cache-control: max-age=12000\r\n" + "Cache-control: max-age=12000\r\n" "Connection: close\r\n\r\n"; const string CSS_RESPONSE_HEADER = diff --git a/include/Track.h b/include/Track.h index 1d0594d..2486f0d 100644 --- a/include/Track.h +++ b/include/Track.h @@ -5,7 +5,7 @@ using namespace std; struct Track { - string Ip; + uint32_t Ip; string UserAgent; }; diff --git a/include/WordUtils.h b/include/WordUtils.h index a4ffd60..eae4325 100644 --- a/include/WordUtils.h +++ b/include/WordUtils.h @@ -12,6 +12,7 @@ struct WordUtils { static unordered_map> load_data(const string& path); static vector load_css(const string& path); static vector predict_next_word(const string& input, const unordered_map>& word_frequencies, size_t count); + static vector predict_next_word_2(const string& input, const unordered_map>& word_frequencies, size_t count); static string load_file(const string& path); static vector split_string(const string& input, data_type type); static string extract_url(const string& input); @@ -19,6 +20,7 @@ struct WordUtils { static bool contains_image(const string& input); static string extract_image_name(const string& input); static string create_tag(const unordered_map>& word_frequencies, const char& hash); + static string create_tag_2(const unordered_map>& word_frequencies, const char& hash); static string create_link(const unordered_map>& word_frequencies, unsigned long hash); static string create_image(const string& image); static unsigned int hash_url(const string& input); diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp index 83bc79c..a2417df 100644 --- a/src/FileUtils.cpp +++ b/src/FileUtils.cpp @@ -9,13 +9,13 @@ bool FileUtils::fileExists(const char *path){ } vector FileUtils::open_image(const string& path) { - std::ifstream file(path, std::ios::binary); + ifstream file(path, ios::binary); if (!file) { - std::cerr << "Error opening file: " << path << std::endl; + cerr << "Error opening file: " << path << endl; return {}; // Return an empty vector on error } - std::vector data; + vector data; char c; while (file.get(c)) { @@ -28,7 +28,7 @@ vector FileUtils::open_image(const string& path) { vector FileUtils::get_image_list(const string& path) { vector images; - const std::string ext(".avif"); + const string ext(".avif"); for (auto &p : filesystem::recursive_directory_iterator(path)) { if (p.path().extension() == ext) { @@ -41,7 +41,7 @@ vector FileUtils::get_image_list(const string& path) { vector FileUtils::get_wordlists(const string& path) { vector images; - const std::string ext(".txt"); + const string ext(".txt"); for (auto &p : filesystem::recursive_directory_iterator(path)) { if (p.path().extension() == ext) { diff --git a/src/ServerUtils.cpp b/src/ServerUtils.cpp index b40d289..07e7ef4 100644 --- a/src/ServerUtils.cpp +++ b/src/ServerUtils.cpp @@ -16,6 +16,11 @@ #include #include #include +#include +#include +#include +#include +#include vector css; @@ -62,7 +67,7 @@ void [[noreturn]] ServerUtils::serve(shared_ptr> cq_track } void ServerUtils::process_request(const int client_fd) { - string ip = get_ip(client_fd); + const uint32_t ip = get_ip_2(client_fd); char buffer[1024]; const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0); @@ -93,18 +98,18 @@ void ServerUtils::process_request(const int client_fd) { if (url == "/style.css") { // This sends the header, that instructs how the browser should interpret the data. - send_header(client_fd, data_type::CSS); + send_header(client_fd, CSS); send_chunked_css(client_fd); } else if (url == "/favicon.png") { - send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", image_type::PNG); + send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", PNG); } else if (WordUtils::contains_image(url)) { string p = "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/images/"; p += WordUtils::extract_image_name(url); - send_image(client_fd, p, image_type::AVIF); + send_image(client_fd, p, AVIF); } else { @@ -121,11 +126,11 @@ void ServerUtils::process_request(const int client_fd) { } void ServerUtils::send_header(const int client_fd, const data_type type) { - if (type == HTML) { + if (type == HTML) [[likely]] { send_data(client_fd, HTML_RESPONSE_HEADER); } - else if (type == CSS) { + else if (type == CSS) [[unlikely]] { send_data(client_fd, CSS_RESPONSE_HEADER); } } @@ -142,17 +147,17 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) { minstd_rand generator(hash); uniform_int_distribution distribution_1(0, end - 1); - uniform_int_distribution distribution_2(0, images.size() - 1); - uniform_int_distribution distribution_3(0, 8); // There is only 9 wordlists. - uniform_int_distribution distribution_4(0, end - 2); const int link = distribution_1(generator); - const int image = distribution_2(generator); - const int l = distribution_3(generator); - const int img = distribution_4(generator); + distribution_1.param(uniform_int_distribution::param_type(0, images.size() - 1)); + const int image = distribution_1(generator); + distribution_1.param(uniform_int_distribution::param_type(0, 8)); + const int l = distribution_1(generator); + distribution_1.param(uniform_int_distribution::param_type(0, end - 2)); + const int img = distribution_1(generator); while (itr < end) { - send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr])); + send_data(client_fd, WordUtils::create_tag_2(all_lists[l], hashes[itr])); if (itr == link) { send_data(client_fd, WordUtils::create_link(all_lists[l], hash)); @@ -176,29 +181,32 @@ void ServerUtils::send_chunked_css(const int client_fd) { for (const auto & cs : css) { send_data(client_fd, cs); - this_thread::sleep_for(chrono::milliseconds(25)); + //this_thread::sleep_for(chrono::milliseconds(25)); } } void ServerUtils::send_image(const int client_fd, const string& path, const image_type type) { const vector image = FileUtils::open_image(path); - std::ostringstream oss; - oss << "HTTP/1.1 200 OK\r\n"; + string s = "HTTP/1.1 200 OK\r\n"; + if (type == PNG) { - oss << "Content-Type: image/png\r\n"; + s += "Content-Type: image/png\r\n"; } if (type == AVIF) { - oss << "Content-Type: image/avif\r\n"; + s += "Content-Type: image/avif\r\n"; } - oss << "Content-Length: " << image.size() << "\r\n"; - oss << "\r\n"; + s += "Cache-control: max-age=12000\r\n"; + s += "Content-Length: "; + s += to_string(image.size()); + s += "\r\n"; + s += "\r\n"; - send(client_fd, oss.str().c_str(), oss.str().size(), 0); + send(client_fd, s.c_str(), s.size(), 0); - send(client_fd, reinterpret_cast(image.data()), image.size(), 0); + send(client_fd, image.data(), image.size(), 0); //send_all(client_fd,reinterpret_cast(image.data()), image.size()); } @@ -217,30 +225,29 @@ size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t } void ServerUtils::send_data(const int client_fd, const string& data) { - ostringstream oss; + char size_hex[20]; // hold 64-bit hex value + snprintf(size_hex, sizeof(size_hex), "%zx", data.size()); - oss << hex << data.size() << "\r\n" << data << "\r\n"; + string message; + message.reserve(strlen(size_hex) + 4 + data.size()); + message += size_hex; + message += "\r\n"; + message += data; + message += "\r\n"; - send(client_fd, oss.str().c_str(), oss.str().size(), 0); + send(client_fd, message.c_str(), message.size(), 0); } -string ServerUtils::get_ip(const int client_fd) { +uint32_t ServerUtils::get_ip_2(const int client_fd) { sockaddr_storage addr{}; socklen_t len = sizeof(addr); - char buf[INET6_ADDRSTRLEN]; if (getpeername(client_fd, reinterpret_cast(&addr), &len) == -1) { perror("getpeername"); - return ""; + return 0; // Or some other error value. Could throw an exception. } - if (addr.ss_family == AF_INET) { - // IPv4 - const sockaddr_in* s = reinterpret_cast(&addr); - inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf)); - } else { - return "Unknown address family"; - } + const sockaddr_in* s = reinterpret_cast(&addr); + return ntohl(s->sin_addr.s_addr); // Convert network byte order to host byte order - return buf; } \ No newline at end of file diff --git a/src/TrackerUtils.cpp b/src/TrackerUtils.cpp index 0177bb8..a1e6f6d 100644 --- a/src/TrackerUtils.cpp +++ b/src/TrackerUtils.cpp @@ -26,16 +26,16 @@ void [[noreturn]] TrackerUtils::track(const shared_ptr>& continue; } - vector temp = WordUtils::split_string(track.Ip, IP); + /*vector temp = WordUtils::split_string(track.Ip, IP); const int ip1 = stoi(temp[0]); const int ip2 = stoi(temp[1]); const int ip3 = stoi(temp[2]); const int ip4 = stoi(temp[3]); - const uint32_t ip = static_cast(ip1 << 24) | static_cast(ip2 << 16) | static_cast(ip3 << 8) | static_cast(ip4); + const uint32_t ip = static_cast(ip1 << 24) | static_cast(ip2 << 16) | static_cast(ip3 << 8) | static_cast(ip4);*/ - (*urls)[ip].links_pressed++; - (*urls)[ip].user_agent = track.UserAgent; + (*urls)[track.Ip].links_pressed++; + (*urls)[track.Ip].user_agent = track.UserAgent; } } diff --git a/src/WordUtils.cpp b/src/WordUtils.cpp index 703e248..55b57be 100644 --- a/src/WordUtils.cpp +++ b/src/WordUtils.cpp @@ -6,10 +6,14 @@ #include #include #include +#include +#include +#include +#include +#include -// We can just use an unordered_map> instead unordered_map> WordUtils::load_data(const string& path) { - vector data = split_string(load_file(path), data_type::HTML); + vector data = split_string(load_file(path), HTML); unordered_map> word_frequencies = {}; @@ -26,8 +30,7 @@ vector WordUtils::load_css(const string& path) { return split_string(load_file(path), data_type::CSS); } -// We can just use an unordered_map> instead, since we're already sorting before counting. -vector WordUtils::predict_next_word(const string& input, const unordered_map>& word_frequencies, size_t count) { +vector WordUtils::predict_next_word(const string& input, const unordered_map>& word_frequencies, const size_t count) { const auto it = word_frequencies.find(input); if (it == word_frequencies.end()) return {input}; @@ -51,12 +54,42 @@ vector WordUtils::predict_next_word(const string& input, const unordered return results; } +vector WordUtils::predict_next_word_2(const string& input, const unordered_map>& word_frequencies, const size_t count) { + const auto it = word_frequencies.find(input); + if (it == word_frequencies.end()) return {input}; + + const auto& nextWords = it->second; + + const size_t limit = min(count, nextWords.size()); + vector> topWords(limit); // preallocate exact size + + const auto middle = topWords.begin() + limit; + + // partial_sort_copy into preallocated vector + const auto end_it = partial_sort_copy( + nextWords.begin(), nextWords.end(), + topWords.begin(), middle, + [](const auto& a, const auto& b) { + return a.second > b.second; // sort descending + } + ); + + // Convert pairs to strings + vector results; + results.reserve(distance(topWords.begin(), end_it)); + for (auto it_ = topWords.begin(); it_ != end_it; ++it_) { + results.emplace_back(it_->first); + } + + return results; +} + string WordUtils::create_tag(const unordered_map>& word_frequencies, const char& hash) { constexpr unsigned char predict_num = 5; const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"}; - vector tags; + string tags_1[27]; minstd_rand generator(hash); @@ -64,25 +97,25 @@ string WordUtils::create_tag(const unordered_map temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num); + vector temp_words = predict_next_word_2(lol, word_frequencies, predict_num); uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); - tags.push_back(lol); - tags.push_back(temp_words[outer_2_distribution(generator)]); + tags_1[0] = lol; + tags_1[1] = temp_words[outer_2_distribution(generator)]; // Words inside the

tag for (unsigned short j = 0; j < 25; j++) { - temp_words = predict_next_word(tags[j], word_frequencies, predict_num); + temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); uniform_int_distribution inner_distribution(0, temp_words.size() - 1); - tags.push_back(temp_words[inner_distribution(generator)]); + tags_1[j + 2] = temp_words[inner_distribution(generator)]; } string temp_string = "

"; - for (const auto & tag : tags) { + for (const auto & tag : tags_1) { temp_string += tag; temp_string += " "; } @@ -92,38 +125,82 @@ string WordUtils::create_tag(const unordered_map>& word_frequencies, const char& hash) { + constexpr unsigned char predict_num = 5; + + const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"}; + + string tags_1[27]; + + minstd_rand generator(hash); + + uniform_int_distribution start_dist(0, start_words->length() - 1); + const size_t first_index = start_dist(generator); + + const string& first_word = start_words[first_index]; + vector temp_words = predict_next_word_2(first_word, word_frequencies, predict_num); + + start_dist.param(uniform_int_distribution::param_type(0, temp_words.size() - 1)); + + tags_1[0] = first_word; + tags_1[1] = temp_words[start_dist(generator)]; + + uniform_int_distribution next_word_dist; // re-used, param set each loop + + // Words inside the

tag + for (unsigned short j = 0; j < 25; j++) { + temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); + + next_word_dist.param(uniform_int_distribution::param_type(0, temp_words.size() - 1)); + + tags_1[j + 2] = temp_words[next_word_dist(generator)]; + } + + string temp_string = "

"; + + for (const auto & tag : tags_1) { + temp_string += tag; + temp_string += " "; + } + + temp_string += ".

\n"; + + return temp_string; +} + + string WordUtils::create_link(const unordered_map>& word_frequencies, const unsigned long hash) { constexpr unsigned char predict_num = 10; const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"}; - vector tags; + string tags_1[9]; minstd_rand generator(hash); uniform_int_distribution outer_distribution(0, start_words->size() - 1); const string lol = start_words[outer_distribution(generator)]; - vector temp_words = predict_next_word(lol, word_frequencies, predict_num); + vector temp_words = predict_next_word_2(lol, word_frequencies, predict_num); uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); - tags.push_back(lol); - tags.push_back(temp_words[outer_2_distribution(generator)]); + tags_1[0] = lol; + tags_1[1] = temp_words[outer_2_distribution(generator)]; // Words inside the

tag for (unsigned short j = 0; j < 7; j++) { - temp_words = predict_next_word(tags[j], word_frequencies, predict_num); + temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); uniform_int_distribution inner_distribution(0, temp_words.size() - 1); - tags.push_back(temp_words[inner_distribution(generator)]); + tags_1[j + 2] = temp_words[inner_distribution(generator)]; } string temp_string_2 = ""; + temp_1 += ".avif\">"; return temp_1; } vector WordUtils::split_string(const string& input, const data_type type) { vector data; + const char* delimiters = nullptr; + + switch (type) { + case HTML: delimiters = " .,!?;:()\n"; break; + case CSS: delimiters = "}"; break; + case TEXT: delimiters = "\n"; break; + case IP: delimiters = "."; break; + default: delimiters = " "; break; + } + size_t start = 0; - size_t end = 0; + const size_t len = input.length(); - string delimiter_string; + while (start < len) { + size_t end = input.find_first_of(delimiters, start); - if (type == HTML) { - delimiter_string = " .,!?;:()\n\r\t"; - } + if (end == string::npos) end = len; - else if (type == CSS) { - delimiter_string = "}\n\r\t"; - } - - else if (type == TEXT) { - delimiter_string = "\n"; - } - - else if (type == IP) { - delimiter_string = "."; - } - - while ((end = input.find_first_of(delimiter_string, start)) != string::npos) { if (end > start) { - data.push_back(input.substr(start, end - start)); + // Instead of allocating a new string with substr(), use string_view then copy only if needed. + data.emplace_back(input.begin() + start, input.begin() + end); } start = end + 1; } - // Add the last token, if any - if (start < input.length()) { - data.push_back(input.substr(start)); - } - return data; }