diff --git a/include/MetricsExporter.h b/include/MetricsExporter.h index 761ecf9..c776487 100644 --- a/include/MetricsExporter.h +++ b/include/MetricsExporter.h @@ -14,7 +14,8 @@ using namespace std; struct MetricsExporter { static void serve(shared_ptr> crawler); static void process_request(int client_fd); - static void send_data(int client_fd, const string& data); }; +inline string HEAD = "HTTP/1.1 200 OK\r\nContent-Type: text/plain; version=0.0.4\r\n\r\n"; + #endif diff --git a/include/ServerUtils.h b/include/ServerUtils.h index b1f54ef..a7dd566 100644 --- a/include/ServerUtils.h +++ b/include/ServerUtils.h @@ -19,8 +19,7 @@ class ServerUtils { static void send_css(int client_fd); static void send_data(int client_fd, const string& data); static void send_image(int client_fd, const string& path, image_type type); - static size_t send_all(int client_fd, const char* data, size_t length); - static uint32_t get_ip_2(int client_fd); + static uint32_t get_ip(int client_fd); }; const string HTML_RESPONSE_HEADER = diff --git a/include/Track.h b/include/Track.h index 2486f0d..2f9d5ee 100644 --- a/include/Track.h +++ b/include/Track.h @@ -9,4 +9,4 @@ struct Track { string UserAgent; }; -#endif //TRACK_H +#endif diff --git a/include/WordUtils.h b/include/WordUtils.h index 578c7ab..60c5133 100644 --- a/include/WordUtils.h +++ b/include/WordUtils.h @@ -12,7 +12,6 @@ struct WordUtils { static unordered_map> load_data(const string& path); static string load_css(const string& path); static vector predict_next_word(const string& input, const unordered_map>& word_frequencies, size_t count); - static vector predict_next_word_2(const string& input, const unordered_map>& word_frequencies, size_t count); static string load_file(const string& path); static vector split_string(const string& input, data_type type); static string extract_url(const string& input); @@ -20,7 +19,6 @@ struct WordUtils { static bool contains_image(const string& input); static string extract_image_name(const string& input); static string create_tag(const unordered_map>& word_frequencies, const char& hash); - static string create_tag_2(const unordered_map>& word_frequencies, const char& hash); static string create_link(const unordered_map>& word_frequencies, unsigned long hash); static string create_image(const string& image); static unsigned int hash_url(const string& input); diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp index a2417df..2309be2 100644 --- a/src/FileUtils.cpp +++ b/src/FileUtils.cpp @@ -29,8 +29,7 @@ vector FileUtils::open_image(const string& path) { vector FileUtils::get_image_list(const string& path) { vector images; const string ext(".avif"); - for (auto &p : filesystem::recursive_directory_iterator(path)) - { + for (auto &p : filesystem::recursive_directory_iterator(path)) { if (p.path().extension() == ext) { images.push_back(p.path().stem().string()); } @@ -42,8 +41,7 @@ vector FileUtils::get_image_list(const string& path) { vector FileUtils::get_wordlists(const string& path) { vector images; const string ext(".txt"); - for (auto &p : filesystem::recursive_directory_iterator(path)) - { + for (auto &p : filesystem::recursive_directory_iterator(path)) { if (p.path().extension() == ext) { images.push_back(p.path().string()); } diff --git a/src/MetricsExporter.cpp b/src/MetricsExporter.cpp index 60afbaf..1e72317 100644 --- a/src/MetricsExporter.cpp +++ b/src/MetricsExporter.cpp @@ -46,11 +46,9 @@ void [[noreturn]] MetricsExporter::serve(shared_ptr #include - string css; vector images; vector>> all_lists; @@ -64,7 +63,7 @@ void [[noreturn]] ServerUtils::serve(shared_ptr> cq_track } void ServerUtils::process_request(const int client_fd) { - const uint32_t ip = get_ip_2(client_fd); + const uint32_t ip = get_ip(client_fd); char buffer[1024]; const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0); @@ -84,26 +83,24 @@ void ServerUtils::process_request(const int client_fd) { } } else { close(client_fd); - cerr << "AAAA \n"; + cerr << "BBBB \n"; } - if (url == "/style.css") { - // This sends the header, that instructs how the browser should interpret the data. - //send_header(client_fd, CSS); + if (url == "/style.css") [[unlikely]] { send_css(client_fd); } - else if (url == "/favicon.png") { + else if (url == "/favicon.png") [[unlikely]] { send_image(client_fd, options_.favicon, PNG); } - else if (WordUtils::contains_image(url)) { + else if (WordUtils::contains_image(url)) [[unlikely]] { string p = options_.images; p += WordUtils::extract_image_name(url); send_image(client_fd, p, AVIF); } - else { + else [[likely]] { Track track; track.Ip = ip; track.UserAgent = user_agent; @@ -112,7 +109,6 @@ void ServerUtils::process_request(const int client_fd) { const unsigned long hash3 = WordUtils::fnv1aHash(url); - //send_header(client_fd, HTML); send_html(client_fd, hash3); } @@ -144,17 +140,14 @@ void ServerUtils::send_html(const int client_fd, const size_t hash) { html += HTML_MAIN_1; while (itr < end) { - html += WordUtils::create_tag_2(all_lists[l], hashes[itr]); - //send_data(client_fd, WordUtils::create_tag_2(all_lists[l], hashes[itr])); + html += WordUtils::create_tag(all_lists[l], hashes[itr]); if (itr == link) { html += WordUtils::create_link(all_lists[l], hash); - //send_data(client_fd, WordUtils::create_link(all_lists[l], hash)); } if (itr == img) { html += WordUtils::create_image(images[image]); - //send_data(client_fd, WordUtils::create_image(images[image])); } itr++; @@ -176,11 +169,11 @@ void ServerUtils::send_image(const int client_fd, const string& path, const imag string s = "HTTP/1.1 200 OK\r\n"; - if (type == PNG) { + if (type == PNG) [[unlikely]] { s += "Content-Type: image/png\r\n"; } - if (type == AVIF) { + if (type == AVIF) [[likely]] { s += "Content-Type: image/avif\r\n"; } @@ -193,34 +186,18 @@ void ServerUtils::send_image(const int client_fd, const string& path, const imag send(client_fd, s.c_str(), s.size(), 0); send(client_fd, image.data(), image.size(), 0); - //send_all(client_fd,reinterpret_cast(image.data()), image.size()); } - -size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t length) { - size_t total_sent = 0; - while (total_sent < length) { - const size_t sent = send(client_fd, data + total_sent, length - total_sent, 0); - if (sent <= 0) { - return sent; // Error or connection closed - } - - total_sent += sent; - } - - return total_sent; -} - void ServerUtils::send_data(const int client_fd, const string& data) { send(client_fd, data.c_str(), data.size(), 0); } -uint32_t ServerUtils::get_ip_2(const int client_fd) { +uint32_t ServerUtils::get_ip(const int client_fd) { sockaddr_storage addr{}; socklen_t len = sizeof(addr); if (getpeername(client_fd, reinterpret_cast(&addr), &len) == -1) { perror("getpeername"); - return 0; // Or some other error value. Could throw an exception. + return 0; } const sockaddr_in* s = reinterpret_cast(&addr); diff --git a/src/WordUtils.cpp b/src/WordUtils.cpp index 177f3e2..34f6caa 100644 --- a/src/WordUtils.cpp +++ b/src/WordUtils.cpp @@ -32,30 +32,6 @@ string WordUtils::load_css(const string& path) { vector WordUtils::predict_next_word(const string& input, const unordered_map>& word_frequencies, const size_t count) { const auto it = word_frequencies.find(input); - - if (it == word_frequencies.end()) return {input}; - - const unordered_map nextWords = it->second; - - vector> sortedWords(nextWords.begin(), nextWords.end()); - - // Sort by frequency (descending) - ranges::sort(sortedWords,[](const auto& a, const auto& b) { - return a.second > b.second; - }); - - vector results; - - // Take up to "count" most common words - for (size_t i = 0; i < min(count, sortedWords.size()); ++i) { - results.push_back(sortedWords[i].first); - } - - return results; -} - -vector WordUtils::predict_next_word_2(const string& input, const unordered_map>& word_frequencies, const size_t count) { - const auto it = word_frequencies.find(input); if (it == word_frequencies.end()) return {input}; const auto& nextWords = it->second; @@ -93,52 +69,11 @@ string WordUtils::create_tag(const unordered_map outer_distribution(0, start_words->length() - 1); - - const string lol = start_words[outer_distribution(generator)]; - - vector temp_words = predict_next_word_2(lol, word_frequencies, predict_num); - - uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); - - tags_1[0] = lol; - tags_1[1] = temp_words[outer_2_distribution(generator)]; - - // Words inside the

tag - for (unsigned short j = 0; j < 25; j++) { - temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); - - uniform_int_distribution inner_distribution(0, temp_words.size() - 1); - - tags_1[j + 2] = temp_words[inner_distribution(generator)]; - } - - string temp_string = "

"; - - for (const auto & tag : tags_1) { - temp_string += tag; - temp_string += " "; - } - - temp_string += ".

\n"; - - return temp_string; -} - -string WordUtils::create_tag_2(const unordered_map>& word_frequencies, const char& hash) { - constexpr unsigned char predict_num = 5; - - const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"}; - - string tags_1[27]; - - minstd_rand generator(hash); - uniform_int_distribution start_dist(0, start_words->length() - 1); const size_t first_index = start_dist(generator); const string& first_word = start_words[first_index]; - vector temp_words = predict_next_word_2(first_word, word_frequencies, predict_num); + vector temp_words = predict_next_word(first_word, word_frequencies, predict_num); start_dist.param(uniform_int_distribution::param_type(0, temp_words.size() - 1)); @@ -149,7 +84,7 @@ string WordUtils::create_tag_2(const unordered_map tag for (unsigned short j = 0; j < 25; j++) { - temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); + temp_words = predict_next_word(tags_1[j], word_frequencies, predict_num); next_word_dist.param(uniform_int_distribution::param_type(0, temp_words.size() - 1)); @@ -181,7 +116,7 @@ string WordUtils::create_link(const unordered_map outer_distribution(0, start_words->size() - 1); const string lol = start_words[outer_distribution(generator)]; - vector temp_words = predict_next_word_2(lol, word_frequencies, predict_num); + vector temp_words = predict_next_word(lol, word_frequencies, predict_num); uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); @@ -190,7 +125,7 @@ string WordUtils::create_link(const unordered_map tag for (unsigned short j = 0; j < 7; j++) { - temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num); + temp_words = predict_next_word(tags_1[j], word_frequencies, predict_num); uniform_int_distribution inner_distribution(0, temp_words.size() - 1); @@ -241,10 +176,9 @@ vector WordUtils::split_string(const string& input, const data_type type while (start < len) { size_t end = input.find_first_of(delimiters, start); - if (end == string::npos) end = len; + if (end == string::npos) [[unlikely]] end = len; - if (end > start) { - // Instead of allocating a new string with substr(), use string_view then copy only if needed. + if (end > start) [[likely]] { data.emplace_back(input.begin() + start, input.begin() + end); } @@ -272,26 +206,26 @@ string WordUtils::extract_url(const string& input) { if (input.empty()) return ""; const unsigned short first_line_end = input.find('\n'); - if (first_line_end == string::npos) return ""; + if (first_line_end == string::npos) [[unlikely]] return ""; string first_line = input.substr(0, first_line_end); const unsigned short method_end = first_line.find(' '); - if (method_end == string::npos) return ""; + if (method_end == string::npos) [[unlikely]] return ""; const unsigned short path_end = first_line.find(' ', method_end + 1); - if (path_end == string::npos) return ""; + if (path_end == string::npos) [[unlikely]] return ""; return first_line.substr(method_end + 1, path_end - method_end - 1); } string WordUtils::extract_user_agent(const string& input) { - if (input.empty()) return ""; + if (input.empty()) [[unlikely]] return ""; vector lines = split_string(input, TEXT); const unsigned short first_line_end = lines[2].find('\n'); - if (first_line_end == string::npos) return ""; + if (first_line_end == string::npos) [[unlikely]] return ""; string first_line = lines[2].substr(12, first_line_end); @@ -301,7 +235,7 @@ string WordUtils::extract_user_agent(const string& input) { string WordUtils::extract_image_name(const string& input) { const unsigned short first_line_end = input.find('f'); - if (first_line_end == string::npos) return ""; + if (first_line_end == string::npos) [[unlikely]] return ""; string first_line = input.substr(1, first_line_end); @@ -311,7 +245,7 @@ string WordUtils::extract_image_name(const string& input) { bool WordUtils::contains_image(const string& input) { const unsigned short first_line_end = input.find('.'); - if (first_line_end == 65535) return false; + if (first_line_end == 65535) [[unlikely]] return false; string type = input.substr(first_line_end + 1, 5); diff --git a/tarpit b/tarpit new file mode 100755 index 0000000..3bb2a68 Binary files /dev/null and b/tarpit differ