There shouldn't be any memory leak anymore, and it's now super fast.
This commit is contained in:
parent
14279b5c06
commit
4cec4644fa
@ -20,14 +20,14 @@ class ServerUtils {
|
|||||||
static void send_data(int client_fd, const string& data);
|
static void send_data(int client_fd, const string& data);
|
||||||
static void send_image(int client_fd, const string& path, image_type type);
|
static void send_image(int client_fd, const string& path, image_type type);
|
||||||
static size_t send_all(int client_fd, const char* data, size_t length);
|
static size_t send_all(int client_fd, const char* data, size_t length);
|
||||||
static string get_ip(int client_fd);
|
static uint32_t get_ip_2(int client_fd);
|
||||||
};
|
};
|
||||||
|
|
||||||
const string HTML_RESPONSE_HEADER =
|
const string HTML_RESPONSE_HEADER =
|
||||||
"HTTP/1.1 200 OK\r\n"
|
"HTTP/1.1 200 OK\r\n"
|
||||||
"Content-Type: text/html; charset=utf-8\r\n"
|
"Content-Type: text/html; charset=utf-8\r\n"
|
||||||
"Transfer-Encoding: chunked\r\n"
|
"Transfer-Encoding: chunked\r\n"
|
||||||
//"Cache-control: max-age=12000\r\n"
|
"Cache-control: max-age=12000\r\n"
|
||||||
"Connection: close\r\n\r\n";
|
"Connection: close\r\n\r\n";
|
||||||
|
|
||||||
const string CSS_RESPONSE_HEADER =
|
const string CSS_RESPONSE_HEADER =
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
struct Track {
|
struct Track {
|
||||||
string Ip;
|
uint32_t Ip;
|
||||||
string UserAgent;
|
string UserAgent;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ struct WordUtils {
|
|||||||
static unordered_map<string, unordered_map<string, int>> load_data(const string& path);
|
static unordered_map<string, unordered_map<string, int>> load_data(const string& path);
|
||||||
static vector<string> load_css(const string& path);
|
static vector<string> load_css(const string& path);
|
||||||
static vector<string> predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
|
static vector<string> predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
|
||||||
|
static vector<string> predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count);
|
||||||
static string load_file(const string& path);
|
static string load_file(const string& path);
|
||||||
static vector<string> split_string(const string& input, data_type type);
|
static vector<string> split_string(const string& input, data_type type);
|
||||||
static string extract_url(const string& input);
|
static string extract_url(const string& input);
|
||||||
@ -19,6 +20,7 @@ struct WordUtils {
|
|||||||
static bool contains_image(const string& input);
|
static bool contains_image(const string& input);
|
||||||
static string extract_image_name(const string& input);
|
static string extract_image_name(const string& input);
|
||||||
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
||||||
|
static string create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
||||||
static string create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, unsigned long hash);
|
static string create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, unsigned long hash);
|
||||||
static string create_image(const string& image);
|
static string create_image(const string& image);
|
||||||
static unsigned int hash_url(const string& input);
|
static unsigned int hash_url(const string& input);
|
||||||
|
@ -9,13 +9,13 @@ bool FileUtils::fileExists(const char *path){
|
|||||||
}
|
}
|
||||||
|
|
||||||
vector<unsigned char> FileUtils::open_image(const string& path) {
|
vector<unsigned char> FileUtils::open_image(const string& path) {
|
||||||
std::ifstream file(path, std::ios::binary);
|
ifstream file(path, ios::binary);
|
||||||
if (!file) {
|
if (!file) {
|
||||||
std::cerr << "Error opening file: " << path << std::endl;
|
cerr << "Error opening file: " << path << endl;
|
||||||
return {}; // Return an empty vector on error
|
return {}; // Return an empty vector on error
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<unsigned char> data;
|
vector<unsigned char> data;
|
||||||
char c;
|
char c;
|
||||||
|
|
||||||
while (file.get(c)) {
|
while (file.get(c)) {
|
||||||
@ -28,7 +28,7 @@ vector<unsigned char> FileUtils::open_image(const string& path) {
|
|||||||
|
|
||||||
vector<string> FileUtils::get_image_list(const string& path) {
|
vector<string> FileUtils::get_image_list(const string& path) {
|
||||||
vector<string> images;
|
vector<string> images;
|
||||||
const std::string ext(".avif");
|
const string ext(".avif");
|
||||||
for (auto &p : filesystem::recursive_directory_iterator(path))
|
for (auto &p : filesystem::recursive_directory_iterator(path))
|
||||||
{
|
{
|
||||||
if (p.path().extension() == ext) {
|
if (p.path().extension() == ext) {
|
||||||
@ -41,7 +41,7 @@ vector<string> FileUtils::get_image_list(const string& path) {
|
|||||||
|
|
||||||
vector<string> FileUtils::get_wordlists(const string& path) {
|
vector<string> FileUtils::get_wordlists(const string& path) {
|
||||||
vector<string> images;
|
vector<string> images;
|
||||||
const std::string ext(".txt");
|
const string ext(".txt");
|
||||||
for (auto &p : filesystem::recursive_directory_iterator(path))
|
for (auto &p : filesystem::recursive_directory_iterator(path))
|
||||||
{
|
{
|
||||||
if (p.path().extension() == ext) {
|
if (p.path().extension() == ext) {
|
||||||
|
@ -16,6 +16,11 @@
|
|||||||
#include <random>
|
#include <random>
|
||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
|
||||||
vector<string> css;
|
vector<string> css;
|
||||||
@ -62,7 +67,7 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::process_request(const int client_fd) {
|
void ServerUtils::process_request(const int client_fd) {
|
||||||
string ip = get_ip(client_fd);
|
const uint32_t ip = get_ip_2(client_fd);
|
||||||
|
|
||||||
char buffer[1024];
|
char buffer[1024];
|
||||||
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
|
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
|
||||||
@ -93,18 +98,18 @@ void ServerUtils::process_request(const int client_fd) {
|
|||||||
|
|
||||||
if (url == "/style.css") {
|
if (url == "/style.css") {
|
||||||
// This sends the header, that instructs how the browser should interpret the data.
|
// This sends the header, that instructs how the browser should interpret the data.
|
||||||
send_header(client_fd, data_type::CSS);
|
send_header(client_fd, CSS);
|
||||||
send_chunked_css(client_fd);
|
send_chunked_css(client_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (url == "/favicon.png") {
|
else if (url == "/favicon.png") {
|
||||||
send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", image_type::PNG);
|
send_image(client_fd, "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/a.png", PNG);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (WordUtils::contains_image(url)) {
|
else if (WordUtils::contains_image(url)) {
|
||||||
string p = "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/images/";
|
string p = "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/images/";
|
||||||
p += WordUtils::extract_image_name(url);
|
p += WordUtils::extract_image_name(url);
|
||||||
send_image(client_fd, p, image_type::AVIF);
|
send_image(client_fd, p, AVIF);
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
@ -121,11 +126,11 @@ void ServerUtils::process_request(const int client_fd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::send_header(const int client_fd, const data_type type) {
|
void ServerUtils::send_header(const int client_fd, const data_type type) {
|
||||||
if (type == HTML) {
|
if (type == HTML) [[likely]] {
|
||||||
send_data(client_fd, HTML_RESPONSE_HEADER);
|
send_data(client_fd, HTML_RESPONSE_HEADER);
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (type == CSS) {
|
else if (type == CSS) [[unlikely]] {
|
||||||
send_data(client_fd, CSS_RESPONSE_HEADER);
|
send_data(client_fd, CSS_RESPONSE_HEADER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,17 +147,17 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
|||||||
|
|
||||||
minstd_rand generator(hash);
|
minstd_rand generator(hash);
|
||||||
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
|
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
|
||||||
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
|
|
||||||
uniform_int_distribution<unsigned short> distribution_3(0, 8); // There is only 9 wordlists.
|
|
||||||
uniform_int_distribution<unsigned short> distribution_4(0, end - 2);
|
|
||||||
|
|
||||||
const int link = distribution_1(generator);
|
const int link = distribution_1(generator);
|
||||||
const int image = distribution_2(generator);
|
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, images.size() - 1));
|
||||||
const int l = distribution_3(generator);
|
const int image = distribution_1(generator);
|
||||||
const int img = distribution_4(generator);
|
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, 8));
|
||||||
|
const int l = distribution_1(generator);
|
||||||
|
distribution_1.param(uniform_int_distribution<unsigned short>::param_type(0, end - 2));
|
||||||
|
const int img = distribution_1(generator);
|
||||||
|
|
||||||
while (itr < end) {
|
while (itr < end) {
|
||||||
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
|
send_data(client_fd, WordUtils::create_tag_2(all_lists[l], hashes[itr]));
|
||||||
|
|
||||||
if (itr == link) {
|
if (itr == link) {
|
||||||
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
|
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
|
||||||
@ -176,29 +181,32 @@ void ServerUtils::send_chunked_css(const int client_fd) {
|
|||||||
for (const auto & cs : css) {
|
for (const auto & cs : css) {
|
||||||
send_data(client_fd, cs);
|
send_data(client_fd, cs);
|
||||||
|
|
||||||
this_thread::sleep_for(chrono::milliseconds(25));
|
//this_thread::sleep_for(chrono::milliseconds(25));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::send_image(const int client_fd, const string& path, const image_type type) {
|
void ServerUtils::send_image(const int client_fd, const string& path, const image_type type) {
|
||||||
const vector<unsigned char> image = FileUtils::open_image(path);
|
const vector<unsigned char> image = FileUtils::open_image(path);
|
||||||
|
|
||||||
std::ostringstream oss;
|
string s = "HTTP/1.1 200 OK\r\n";
|
||||||
oss << "HTTP/1.1 200 OK\r\n";
|
|
||||||
if (type == PNG) {
|
if (type == PNG) {
|
||||||
oss << "Content-Type: image/png\r\n";
|
s += "Content-Type: image/png\r\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == AVIF) {
|
if (type == AVIF) {
|
||||||
oss << "Content-Type: image/avif\r\n";
|
s += "Content-Type: image/avif\r\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
oss << "Content-Length: " << image.size() << "\r\n";
|
s += "Cache-control: max-age=12000\r\n";
|
||||||
oss << "\r\n";
|
s += "Content-Length: ";
|
||||||
|
s += to_string(image.size());
|
||||||
|
s += "\r\n";
|
||||||
|
s += "\r\n";
|
||||||
|
|
||||||
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
|
send(client_fd, s.c_str(), s.size(), 0);
|
||||||
|
|
||||||
send(client_fd, reinterpret_cast<const char*>(image.data()), image.size(), 0);
|
send(client_fd, image.data(), image.size(), 0);
|
||||||
//send_all(client_fd,reinterpret_cast<const char*>(image.data()), image.size());
|
//send_all(client_fd,reinterpret_cast<const char*>(image.data()), image.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -217,30 +225,29 @@ size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::send_data(const int client_fd, const string& data) {
|
void ServerUtils::send_data(const int client_fd, const string& data) {
|
||||||
ostringstream oss;
|
char size_hex[20]; // hold 64-bit hex value
|
||||||
|
snprintf(size_hex, sizeof(size_hex), "%zx", data.size());
|
||||||
|
|
||||||
oss << hex << data.size() << "\r\n" << data << "\r\n";
|
string message;
|
||||||
|
message.reserve(strlen(size_hex) + 4 + data.size());
|
||||||
|
message += size_hex;
|
||||||
|
message += "\r\n";
|
||||||
|
message += data;
|
||||||
|
message += "\r\n";
|
||||||
|
|
||||||
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
|
send(client_fd, message.c_str(), message.size(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
string ServerUtils::get_ip(const int client_fd) {
|
uint32_t ServerUtils::get_ip_2(const int client_fd) {
|
||||||
sockaddr_storage addr{};
|
sockaddr_storage addr{};
|
||||||
socklen_t len = sizeof(addr);
|
socklen_t len = sizeof(addr);
|
||||||
char buf[INET6_ADDRSTRLEN];
|
|
||||||
|
|
||||||
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
|
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
|
||||||
perror("getpeername");
|
perror("getpeername");
|
||||||
return "";
|
return 0; // Or some other error value. Could throw an exception.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (addr.ss_family == AF_INET) {
|
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
||||||
// IPv4
|
return ntohl(s->sin_addr.s_addr); // Convert network byte order to host byte order
|
||||||
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
|
||||||
inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf));
|
|
||||||
} else {
|
|
||||||
return "Unknown address family";
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf;
|
|
||||||
}
|
}
|
@ -26,16 +26,16 @@ void [[noreturn]] TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>>&
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> temp = WordUtils::split_string(track.Ip, IP);
|
/*vector<string> temp = WordUtils::split_string(track.Ip, IP);
|
||||||
const int ip1 = stoi(temp[0]);
|
const int ip1 = stoi(temp[0]);
|
||||||
const int ip2 = stoi(temp[1]);
|
const int ip2 = stoi(temp[1]);
|
||||||
const int ip3 = stoi(temp[2]);
|
const int ip3 = stoi(temp[2]);
|
||||||
const int ip4 = stoi(temp[3]);
|
const int ip4 = stoi(temp[3]);
|
||||||
|
|
||||||
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);
|
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);*/
|
||||||
|
|
||||||
(*urls)[ip].links_pressed++;
|
(*urls)[track.Ip].links_pressed++;
|
||||||
(*urls)[ip].user_agent = track.UserAgent;
|
(*urls)[track.Ip].user_agent = track.UserAgent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,10 +6,14 @@
|
|||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
// We can just use an unordered_map<string, unordered_map<string, int>> instead
|
|
||||||
unordered_map<string, unordered_map<string, int>> WordUtils::load_data(const string& path) {
|
unordered_map<string, unordered_map<string, int>> WordUtils::load_data(const string& path) {
|
||||||
vector<string> data = split_string(load_file(path), data_type::HTML);
|
vector<string> data = split_string(load_file(path), HTML);
|
||||||
|
|
||||||
unordered_map<string, unordered_map<string, int>> word_frequencies = {};
|
unordered_map<string, unordered_map<string, int>> word_frequencies = {};
|
||||||
|
|
||||||
@ -26,8 +30,7 @@ vector<string> WordUtils::load_css(const string& path) {
|
|||||||
return split_string(load_file(path), data_type::CSS);
|
return split_string(load_file(path), data_type::CSS);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We can just use an unordered_map<string, unordered_map<string, int>> instead, since we're already sorting before counting.
|
vector<string> WordUtils::predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
|
||||||
vector<string> WordUtils::predict_next_word(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, size_t count) {
|
|
||||||
const auto it = word_frequencies.find(input);
|
const auto it = word_frequencies.find(input);
|
||||||
|
|
||||||
if (it == word_frequencies.end()) return {input};
|
if (it == word_frequencies.end()) return {input};
|
||||||
@ -51,12 +54,42 @@ vector<string> WordUtils::predict_next_word(const string& input, const unordered
|
|||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vector<string> WordUtils::predict_next_word_2(const string& input, const unordered_map<string, unordered_map<string, int>>& word_frequencies, const size_t count) {
|
||||||
|
const auto it = word_frequencies.find(input);
|
||||||
|
if (it == word_frequencies.end()) return {input};
|
||||||
|
|
||||||
|
const auto& nextWords = it->second;
|
||||||
|
|
||||||
|
const size_t limit = min(count, nextWords.size());
|
||||||
|
vector<pair<string, int>> topWords(limit); // preallocate exact size
|
||||||
|
|
||||||
|
const auto middle = topWords.begin() + limit;
|
||||||
|
|
||||||
|
// partial_sort_copy into preallocated vector
|
||||||
|
const auto end_it = partial_sort_copy(
|
||||||
|
nextWords.begin(), nextWords.end(),
|
||||||
|
topWords.begin(), middle,
|
||||||
|
[](const auto& a, const auto& b) {
|
||||||
|
return a.second > b.second; // sort descending
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert pairs to strings
|
||||||
|
vector<string> results;
|
||||||
|
results.reserve(distance(topWords.begin(), end_it));
|
||||||
|
for (auto it_ = topWords.begin(); it_ != end_it; ++it_) {
|
||||||
|
results.emplace_back(it_->first);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
||||||
constexpr unsigned char predict_num = 5;
|
constexpr unsigned char predict_num = 5;
|
||||||
|
|
||||||
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
||||||
|
|
||||||
vector<string> tags;
|
string tags_1[27];
|
||||||
|
|
||||||
minstd_rand generator(hash);
|
minstd_rand generator(hash);
|
||||||
|
|
||||||
@ -64,25 +97,25 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
|||||||
|
|
||||||
const string lol = start_words[outer_distribution(generator)];
|
const string lol = start_words[outer_distribution(generator)];
|
||||||
|
|
||||||
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
tags.push_back(lol);
|
tags_1[0] = lol;
|
||||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
tags_1[1] = temp_words[outer_2_distribution(generator)];
|
||||||
|
|
||||||
// Words inside the <p> tag
|
// Words inside the <p> tag
|
||||||
for (unsigned short j = 0; j < 25; j++) {
|
for (unsigned short j = 0; j < 25; j++) {
|
||||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
tags.push_back(temp_words[inner_distribution(generator)]);
|
tags_1[j + 2] = temp_words[inner_distribution(generator)];
|
||||||
}
|
}
|
||||||
|
|
||||||
string temp_string = "<p>";
|
string temp_string = "<p>";
|
||||||
|
|
||||||
for (const auto & tag : tags) {
|
for (const auto & tag : tags_1) {
|
||||||
temp_string += tag;
|
temp_string += tag;
|
||||||
temp_string += " ";
|
temp_string += " ";
|
||||||
}
|
}
|
||||||
@ -92,38 +125,82 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
|||||||
return temp_string;
|
return temp_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string WordUtils::create_tag_2(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
||||||
|
constexpr unsigned char predict_num = 5;
|
||||||
|
|
||||||
|
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
||||||
|
|
||||||
|
string tags_1[27];
|
||||||
|
|
||||||
|
minstd_rand generator(hash);
|
||||||
|
|
||||||
|
uniform_int_distribution<size_t> start_dist(0, start_words->length() - 1);
|
||||||
|
const size_t first_index = start_dist(generator);
|
||||||
|
|
||||||
|
const string& first_word = start_words[first_index];
|
||||||
|
vector<string> temp_words = predict_next_word_2(first_word, word_frequencies, predict_num);
|
||||||
|
|
||||||
|
start_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
|
||||||
|
|
||||||
|
tags_1[0] = first_word;
|
||||||
|
tags_1[1] = temp_words[start_dist(generator)];
|
||||||
|
|
||||||
|
uniform_int_distribution<size_t> next_word_dist; // re-used, param set each loop
|
||||||
|
|
||||||
|
// Words inside the <p> tag
|
||||||
|
for (unsigned short j = 0; j < 25; j++) {
|
||||||
|
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
|
||||||
|
|
||||||
|
next_word_dist.param(uniform_int_distribution<size_t>::param_type(0, temp_words.size() - 1));
|
||||||
|
|
||||||
|
tags_1[j + 2] = temp_words[next_word_dist(generator)];
|
||||||
|
}
|
||||||
|
|
||||||
|
string temp_string = "<p>";
|
||||||
|
|
||||||
|
for (const auto & tag : tags_1) {
|
||||||
|
temp_string += tag;
|
||||||
|
temp_string += " ";
|
||||||
|
}
|
||||||
|
|
||||||
|
temp_string += ".</p>\n";
|
||||||
|
|
||||||
|
return temp_string;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
|
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
|
||||||
constexpr unsigned char predict_num = 10;
|
constexpr unsigned char predict_num = 10;
|
||||||
|
|
||||||
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
|
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
|
||||||
|
|
||||||
vector<string> tags;
|
string tags_1[9];
|
||||||
|
|
||||||
minstd_rand generator(hash);
|
minstd_rand generator(hash);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
|
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
|
||||||
|
|
||||||
const string lol = start_words[outer_distribution(generator)];
|
const string lol = start_words[outer_distribution(generator)];
|
||||||
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
|
vector<string> temp_words = predict_next_word_2(lol, word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
tags.push_back(lol);
|
tags_1[0] = lol;
|
||||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
tags_1[1] = temp_words[outer_2_distribution(generator)];
|
||||||
|
|
||||||
// Words inside the <p> tag
|
// Words inside the <p> tag
|
||||||
for (unsigned short j = 0; j < 7; j++) {
|
for (unsigned short j = 0; j < 7; j++) {
|
||||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
temp_words = predict_next_word_2(tags_1[j], word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
tags.push_back(temp_words[inner_distribution(generator)]);
|
tags_1[j + 2] = temp_words[inner_distribution(generator)];
|
||||||
}
|
}
|
||||||
|
|
||||||
string temp_string_2 = "<a href=\"";
|
string temp_string_2 = "<a href=\"";
|
||||||
string temp_string;
|
string temp_string;
|
||||||
|
|
||||||
for (const auto & tag : tags) {
|
for (const auto & tag : tags_1) {
|
||||||
temp_string += tag;
|
temp_string += tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -141,48 +218,39 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
|
|||||||
string WordUtils::create_image(const string& image) {
|
string WordUtils::create_image(const string& image) {
|
||||||
string temp_1 = R"(<img loading="lazy" src="/)";
|
string temp_1 = R"(<img loading="lazy" src="/)";
|
||||||
temp_1 += image;
|
temp_1 += image;
|
||||||
temp_1 += ".avif";
|
temp_1 += ".avif\">";
|
||||||
temp_1 += "\">";
|
|
||||||
return temp_1;
|
return temp_1;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> WordUtils::split_string(const string& input, const data_type type) {
|
vector<string> WordUtils::split_string(const string& input, const data_type type) {
|
||||||
vector<string> data;
|
vector<string> data;
|
||||||
|
|
||||||
|
const char* delimiters = nullptr;
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case HTML: delimiters = " .,!?;:()\n"; break;
|
||||||
|
case CSS: delimiters = "}"; break;
|
||||||
|
case TEXT: delimiters = "\n"; break;
|
||||||
|
case IP: delimiters = "."; break;
|
||||||
|
default: delimiters = " "; break;
|
||||||
|
}
|
||||||
|
|
||||||
size_t start = 0;
|
size_t start = 0;
|
||||||
size_t end = 0;
|
const size_t len = input.length();
|
||||||
|
|
||||||
string delimiter_string;
|
while (start < len) {
|
||||||
|
size_t end = input.find_first_of(delimiters, start);
|
||||||
|
|
||||||
if (type == HTML) {
|
if (end == string::npos) end = len;
|
||||||
delimiter_string = " .,!?;:()\n\r\t";
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (type == CSS) {
|
|
||||||
delimiter_string = "}\n\r\t";
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (type == TEXT) {
|
|
||||||
delimiter_string = "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (type == IP) {
|
|
||||||
delimiter_string = ".";
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
|
|
||||||
if (end > start) {
|
if (end > start) {
|
||||||
data.push_back(input.substr(start, end - start));
|
// Instead of allocating a new string with substr(), use string_view then copy only if needed.
|
||||||
|
data.emplace_back(input.begin() + start, input.begin() + end);
|
||||||
}
|
}
|
||||||
|
|
||||||
start = end + 1;
|
start = end + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the last token, if any
|
|
||||||
if (start < input.length()) {
|
|
||||||
data.push_back(input.substr(start));
|
|
||||||
}
|
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user