Somewhat production ready now. It's a bit slow, with 14 clients hammering on the server, the CPU reaches 80% ussage, and 16MB memory footprint.
This commit is contained in:
parent
18581ed9ac
commit
14279b5c06
27
Makefile
27
Makefile
@ -153,6 +153,30 @@ src/FileUtils.cpp.s:
|
||||
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/FileUtils.cpp.s
|
||||
.PHONY : src/FileUtils.cpp.s
|
||||
|
||||
src/MetricsExporter.o: src/MetricsExporter.cpp.o
|
||||
.PHONY : src/MetricsExporter.o
|
||||
|
||||
# target to build an object file
|
||||
src/MetricsExporter.cpp.o:
|
||||
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.o
|
||||
.PHONY : src/MetricsExporter.cpp.o
|
||||
|
||||
src/MetricsExporter.i: src/MetricsExporter.cpp.i
|
||||
.PHONY : src/MetricsExporter.i
|
||||
|
||||
# target to preprocess a source file
|
||||
src/MetricsExporter.cpp.i:
|
||||
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.i
|
||||
.PHONY : src/MetricsExporter.cpp.i
|
||||
|
||||
src/MetricsExporter.s: src/MetricsExporter.cpp.s
|
||||
.PHONY : src/MetricsExporter.s
|
||||
|
||||
# target to generate assembly for a file
|
||||
src/MetricsExporter.cpp.s:
|
||||
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.s
|
||||
.PHONY : src/MetricsExporter.cpp.s
|
||||
|
||||
src/ServerUtils.o: src/ServerUtils.cpp.o
|
||||
.PHONY : src/ServerUtils.o
|
||||
|
||||
@ -261,6 +285,9 @@ help:
|
||||
@echo "... src/FileUtils.o"
|
||||
@echo "... src/FileUtils.i"
|
||||
@echo "... src/FileUtils.s"
|
||||
@echo "... src/MetricsExporter.o"
|
||||
@echo "... src/MetricsExporter.i"
|
||||
@echo "... src/MetricsExporter.s"
|
||||
@echo "... src/ServerUtils.o"
|
||||
@echo "... src/ServerUtils.i"
|
||||
@echo "... src/ServerUtils.s"
|
||||
|
@ -10,9 +10,9 @@ using namespace std;
|
||||
template <typename T>
|
||||
class ConcurrentQueue {
|
||||
private:
|
||||
std::queue<T> queue_;
|
||||
mutable std::mutex mutex_;
|
||||
std::condition_variable condition_;
|
||||
queue<T> queue_;
|
||||
mutable mutex mutex_;
|
||||
condition_variable condition_;
|
||||
|
||||
public:
|
||||
// Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class.
|
||||
@ -49,6 +49,11 @@ public:
|
||||
lock_guard<mutex> lock(mutex_);
|
||||
return queue_.empty();
|
||||
}
|
||||
|
||||
int size() {
|
||||
lock_guard<mutex> lock(mutex_);
|
||||
return queue_.size();
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
13
include/Crawler.h
Normal file
13
include/Crawler.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef CRAWLER_H
|
||||
#define CRAWLER_H
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Crawler {
|
||||
string user_agent;
|
||||
int links_pressed;
|
||||
};
|
||||
|
||||
#endif
|
@ -4,7 +4,9 @@
|
||||
enum data_type {
|
||||
HTML,
|
||||
CSS,
|
||||
IMAGE
|
||||
IMAGE,
|
||||
TEXT,
|
||||
IP
|
||||
};
|
||||
|
||||
enum image_type {
|
||||
|
34
include/MetricsExporter.h
Normal file
34
include/MetricsExporter.h
Normal file
@ -0,0 +1,34 @@
|
||||
//
|
||||
// Created by skingging on 5/16/25.
|
||||
//
|
||||
|
||||
#ifndef METRICSEXPORTER_H
|
||||
#define METRICSEXPORTER_H
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "../include/Crawler.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct MetricsExporter {
|
||||
static void serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler);
|
||||
static void process_request(int client_fd);
|
||||
static void send_data(int client_fd, const string& data);
|
||||
};
|
||||
|
||||
const string HTML_RESPONSE_HEADER_lol =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Content-Type: text/html; charset=utf-8\r\n"
|
||||
"Transfer-Encoding: chunked\r\n"
|
||||
//"Cache-control: max-age=12000\r\n"
|
||||
"Connection: close\r\n\r\n";
|
||||
|
||||
const string BEGINNING =
|
||||
"<!DOCTYPE html>\n<html><head>\r\n"
|
||||
"<title>Drip</title>\r\n"
|
||||
"</head><body>\r\n";
|
||||
|
||||
const string END = "</body></html>\r\n";
|
||||
|
||||
#endif
|
@ -11,7 +11,7 @@ using namespace std;
|
||||
|
||||
class ServerUtils {
|
||||
public:
|
||||
static void serve(shared_ptr<ConcurrentQueue<Track>> t_test);
|
||||
static void serve(shared_ptr<ConcurrentQueue<Track>> cq_track);
|
||||
private:
|
||||
static void process_request(int client_fd);
|
||||
static void send_header(int client_fd, data_type type);
|
||||
@ -27,7 +27,7 @@ const string HTML_RESPONSE_HEADER =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Content-Type: text/html; charset=utf-8\r\n"
|
||||
"Transfer-Encoding: chunked\r\n"
|
||||
"Cache-control: max-age=12000\r\n"
|
||||
//"Cache-control: max-age=12000\r\n"
|
||||
"Connection: close\r\n\r\n";
|
||||
|
||||
const string CSS_RESPONSE_HEADER =
|
||||
|
@ -4,13 +4,14 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "../include/ConcurrentQueue.h"
|
||||
#include "../include/Crawler.h"
|
||||
#include "../include/Track.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct TrackerUtils {
|
||||
static void track(const shared_ptr<ConcurrentQueue<Track>>& t_test);
|
||||
static void print(unordered_map<string, int> tracks);
|
||||
static void track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler);
|
||||
static void print();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -15,6 +15,7 @@ struct WordUtils {
|
||||
static string load_file(const string& path);
|
||||
static vector<string> split_string(const string& input, data_type type);
|
||||
static string extract_url(const string& input);
|
||||
static string extract_user_agent(const string& input);
|
||||
static bool contains_image(const string& input);
|
||||
static string extract_image_name(const string& input);
|
||||
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
||||
|
72
src/MetricsExporter.cpp
Normal file
72
src/MetricsExporter.cpp
Normal file
@ -0,0 +1,72 @@
|
||||
#include "../include/MetricsExporter.h"
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <unistd.h>
|
||||
#include <netinet/in.h>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <random>
|
||||
#include <ranges>
|
||||
#include <sys/socket.h>
|
||||
|
||||
shared_ptr<unordered_map<uint32_t, Crawler>> crawler_;
|
||||
|
||||
void [[noreturn]] MetricsExporter::serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler) {
|
||||
crawler_ = move(crawler);
|
||||
|
||||
// server_fd is a file descriptor.
|
||||
const int server_fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||
|
||||
sockaddr_in addr{};
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_port = htons(8889);
|
||||
addr.sin_addr.s_addr = INADDR_ANY;
|
||||
|
||||
if (bind(server_fd, reinterpret_cast<sockaddr *>(&addr), sizeof(addr)) != 0) {
|
||||
cout << "Please wait for the pipe to close.";
|
||||
return;
|
||||
}
|
||||
|
||||
listen(server_fd, 50);
|
||||
|
||||
cout << "Server is running on http://localhost:8889 \n";
|
||||
|
||||
while (true) {
|
||||
int client_fd = accept(server_fd, nullptr, nullptr);
|
||||
|
||||
thread(process_request, client_fd).detach();
|
||||
}
|
||||
|
||||
close(server_fd);
|
||||
}
|
||||
|
||||
void MetricsExporter::process_request(const int client_fd) {
|
||||
send_data(client_fd, HTML_RESPONSE_HEADER_lol);
|
||||
|
||||
ostringstream oss;
|
||||
|
||||
oss << BEGINNING;
|
||||
|
||||
for (auto it = crawler_->begin(); it != crawler_->end(); ++it) {
|
||||
oss << it->second.links_pressed << ": " << it->second.user_agent << endl;
|
||||
}
|
||||
|
||||
oss << END;
|
||||
|
||||
send_data(client_fd, oss.str());
|
||||
send(client_fd, "0\r\n\r\n", 5, 0);
|
||||
close(client_fd);
|
||||
}
|
||||
|
||||
void MetricsExporter::send_data(const int client_fd, const string& data) {
|
||||
ostringstream oss;
|
||||
|
||||
oss << hex << data.size() << "\r\n" << data << "\r\n";
|
||||
|
||||
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
|
||||
}
|
@ -21,10 +21,10 @@
|
||||
vector<string> css;
|
||||
vector<string> images;
|
||||
vector<unordered_map<string, unordered_map<string, int>>> all_lists;
|
||||
shared_ptr<ConcurrentQueue<Track>> t_test;
|
||||
shared_ptr<ConcurrentQueue<Track>> tracks;
|
||||
|
||||
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
||||
t_test = std::move(test);
|
||||
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track) {
|
||||
tracks = std::move(cq_track);
|
||||
css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css");
|
||||
images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/");
|
||||
const vector<string> words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/");
|
||||
@ -53,6 +53,8 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
||||
while (true) {
|
||||
int client_fd = accept(server_fd, nullptr, nullptr);
|
||||
|
||||
if (client_fd == -1) continue;
|
||||
//cerr << client_fd << endl;
|
||||
thread(process_request, client_fd).detach();
|
||||
}
|
||||
|
||||
@ -60,24 +62,35 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
||||
}
|
||||
|
||||
void ServerUtils::process_request(const int client_fd) {
|
||||
string ip = get_ip(client_fd);
|
||||
|
||||
char buffer[1024];
|
||||
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
|
||||
|
||||
string url;
|
||||
string user_agent;
|
||||
|
||||
if (bytes_received > 0) {
|
||||
buffer[bytes_received] = '\0';
|
||||
|
||||
url = WordUtils::extract_url(string(buffer));
|
||||
user_agent = WordUtils::extract_user_agent(string(buffer));
|
||||
|
||||
if (url.empty()) {
|
||||
close(client_fd);
|
||||
cerr << "AAAA \n";
|
||||
}
|
||||
} else {
|
||||
close(client_fd);
|
||||
cerr << "AAAA \n";
|
||||
}
|
||||
|
||||
Track track;
|
||||
track.Ip = ip;
|
||||
track.UserAgent = user_agent;
|
||||
|
||||
tracks->push(track);
|
||||
|
||||
if (url == "/style.css") {
|
||||
// This sends the header, that instructs how the browser should interpret the data.
|
||||
send_header(client_fd, data_type::CSS);
|
||||
@ -95,13 +108,6 @@ void ServerUtils::process_request(const int client_fd) {
|
||||
}
|
||||
|
||||
else {
|
||||
Track track;
|
||||
track.Ip = get_ip(client_fd);
|
||||
|
||||
// Testing only.
|
||||
track.UserAgent = url;
|
||||
t_test->push(track);
|
||||
|
||||
const unsigned long hash3 = WordUtils::fnv1aHash(url);
|
||||
|
||||
send_header(client_fd, HTML);
|
||||
@ -137,11 +143,13 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
||||
minstd_rand generator(hash);
|
||||
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
|
||||
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
|
||||
uniform_int_distribution<unsigned short> distribution_3(0, 8);
|
||||
uniform_int_distribution<unsigned short> distribution_3(0, 8); // There is only 9 wordlists.
|
||||
uniform_int_distribution<unsigned short> distribution_4(0, end - 2);
|
||||
|
||||
const int link = distribution_1(generator);
|
||||
const int image = distribution_2(generator);
|
||||
const int l = distribution_3(generator);
|
||||
const int img = distribution_4(generator);
|
||||
|
||||
while (itr < end) {
|
||||
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
|
||||
@ -150,11 +158,11 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
||||
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
|
||||
}
|
||||
|
||||
if (itr == link) {
|
||||
if (itr == img) {
|
||||
send_data(client_fd, WordUtils::create_image(images[image]));
|
||||
}
|
||||
|
||||
this_thread::sleep_for(chrono::milliseconds(25));
|
||||
//this_thread::sleep_for(chrono::milliseconds(75));
|
||||
|
||||
itr++;
|
||||
}
|
||||
@ -165,8 +173,8 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
||||
}
|
||||
|
||||
void ServerUtils::send_chunked_css(const int client_fd) {
|
||||
for (size_t i = 0; i < css.size(); i++) {
|
||||
send_data(client_fd, css[i]);
|
||||
for (const auto & cs : css) {
|
||||
send_data(client_fd, cs);
|
||||
|
||||
this_thread::sleep_for(chrono::milliseconds(25));
|
||||
}
|
||||
@ -219,7 +227,7 @@ void ServerUtils::send_data(const int client_fd, const string& data) {
|
||||
string ServerUtils::get_ip(const int client_fd) {
|
||||
sockaddr_storage addr{};
|
||||
socklen_t len = sizeof(addr);
|
||||
char ipstr[INET6_ADDRSTRLEN];
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
|
||||
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
|
||||
perror("getpeername");
|
||||
@ -228,15 +236,11 @@ string ServerUtils::get_ip(const int client_fd) {
|
||||
|
||||
if (addr.ss_family == AF_INET) {
|
||||
// IPv4
|
||||
sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
||||
inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof(ipstr));
|
||||
} else if (addr.ss_family == AF_INET6) {
|
||||
// IPv6
|
||||
sockaddr_in6* s = reinterpret_cast<sockaddr_in6 *>(&addr);
|
||||
inet_ntop(AF_INET6, &s->sin6_addr, ipstr, sizeof(ipstr));
|
||||
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
||||
inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf));
|
||||
} else {
|
||||
return "Unknown address family";
|
||||
}
|
||||
|
||||
return string(ipstr);
|
||||
return buf;
|
||||
}
|
@ -1,34 +1,53 @@
|
||||
#include "../include/TrackerUtils.h"
|
||||
#include "../include/ConcurrentQueue.h"
|
||||
#include "../include/TrackerUtils.h"
|
||||
#include "../include/WordUtils.h"
|
||||
#include "../include/Crawler.h"
|
||||
#include "../include/Track.h"
|
||||
#include <iostream>
|
||||
#include <unordered_map>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <utility>
|
||||
#include <ranges>
|
||||
#include <cstdint>
|
||||
|
||||
void TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>> &t_test) {
|
||||
bool running = true;
|
||||
unordered_map<string, int> urls;
|
||||
shared_ptr<unordered_map<uint32_t, Crawler>> urls;
|
||||
|
||||
void [[noreturn]] TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler) {
|
||||
urls = crawler;
|
||||
|
||||
constexpr bool running = true;
|
||||
|
||||
while (running) {
|
||||
auto [Ip, UserAgent] = t_test->wait_and_pop();
|
||||
Track track;
|
||||
|
||||
urls[UserAgent]++;
|
||||
|
||||
system("clear");
|
||||
print(urls);
|
||||
|
||||
if (Ip == "STOP") {
|
||||
running = false;
|
||||
if (!s_track->try_pop(track)) {
|
||||
this_thread::sleep_for(chrono::milliseconds(100));
|
||||
continue;
|
||||
}
|
||||
|
||||
vector<string> temp = WordUtils::split_string(track.Ip, IP);
|
||||
const int ip1 = stoi(temp[0]);
|
||||
const int ip2 = stoi(temp[1]);
|
||||
const int ip3 = stoi(temp[2]);
|
||||
const int ip4 = stoi(temp[3]);
|
||||
|
||||
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);
|
||||
|
||||
(*urls)[ip].links_pressed++;
|
||||
(*urls)[ip].user_agent = track.UserAgent;
|
||||
}
|
||||
}
|
||||
|
||||
void TrackerUtils::print(unordered_map<string, int> tracks) {
|
||||
for (auto it = tracks.begin(); it != tracks.end(); ++it) {
|
||||
cerr << it->first << ": " << it->second << endl;
|
||||
void [[noreturn]] TrackerUtils::print() {
|
||||
this_thread::sleep_for(chrono::milliseconds(5000));
|
||||
|
||||
while (true) {
|
||||
for (const auto &[user_agent, links_pressed]: *urls | views::values) {
|
||||
cerr << links_pressed << ": " << user_agent << endl;
|
||||
}
|
||||
|
||||
this_thread::sleep_for(chrono::milliseconds(250));
|
||||
this_thread::sleep_for(chrono::milliseconds(1000));
|
||||
system("clear"); // Testing only
|
||||
}
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ vector<string> WordUtils::predict_next_word(const string& input, const unordered
|
||||
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
||||
constexpr unsigned char predict_num = 5;
|
||||
|
||||
const string start_words[3] = {"the", "but", "with"};
|
||||
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
||||
|
||||
vector<string> tags;
|
||||
|
||||
@ -62,15 +62,17 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
||||
|
||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
|
||||
|
||||
const string lol = start_words[outer_distribution(generator)];
|
||||
|
||||
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
||||
|
||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||
|
||||
tags.push_back(lol);
|
||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
||||
|
||||
// Words inside the <p> tag
|
||||
for (unsigned short j = 0; j < 25; j++)
|
||||
{
|
||||
for (unsigned short j = 0; j < 25; j++) {
|
||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
||||
|
||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||
@ -80,8 +82,8 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
||||
|
||||
string temp_string = "<p>";
|
||||
|
||||
for (size_t l = 0; l < tags.size(); l++) {
|
||||
temp_string += tags[l];
|
||||
for (const auto & tag : tags) {
|
||||
temp_string += tag;
|
||||
temp_string += " ";
|
||||
}
|
||||
|
||||
@ -91,25 +93,26 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
||||
}
|
||||
|
||||
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
|
||||
constexpr unsigned char predict_num = 5;
|
||||
constexpr unsigned char predict_num = 10;
|
||||
|
||||
const string start_words[10] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
||||
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
|
||||
|
||||
vector<string> tags;
|
||||
|
||||
minstd_rand generator(hash);
|
||||
|
||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
|
||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
|
||||
|
||||
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
||||
const string lol = start_words[outer_distribution(generator)];
|
||||
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
|
||||
|
||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||
|
||||
tags.push_back(lol);
|
||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
||||
|
||||
// Words inside the <p> tag
|
||||
for (unsigned short j = 0; j < 7; j++)
|
||||
{
|
||||
for (unsigned short j = 0; j < 7; j++) {
|
||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
||||
|
||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||
@ -120,8 +123,8 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
|
||||
string temp_string_2 = "<a href=\"";
|
||||
string temp_string;
|
||||
|
||||
for (size_t l = 0; l < tags.size(); l++) {
|
||||
temp_string += tags[l];
|
||||
for (const auto & tag : tags) {
|
||||
temp_string += tag;
|
||||
}
|
||||
|
||||
temp_string_2 += temp_string;
|
||||
@ -136,7 +139,6 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
|
||||
}
|
||||
|
||||
string WordUtils::create_image(const string& image) {
|
||||
|
||||
string temp_1 = R"(<img loading="lazy" src="/)";
|
||||
temp_1 += image;
|
||||
temp_1 += ".avif";
|
||||
@ -144,23 +146,29 @@ string WordUtils::create_image(const string& image) {
|
||||
return temp_1;
|
||||
}
|
||||
|
||||
vector<string> WordUtils::split_string(const string& input, data_type type) {
|
||||
vector<string> WordUtils::split_string(const string& input, const data_type type) {
|
||||
vector<string> data;
|
||||
|
||||
size_t start = 0;
|
||||
size_t end = 0;
|
||||
|
||||
// Create a string from the delimiters array
|
||||
string delimiter_string;
|
||||
|
||||
if (type == data_type::HTML) {
|
||||
if (type == HTML) {
|
||||
delimiter_string = " .,!?;:()\n\r\t";
|
||||
}
|
||||
|
||||
else if (type == data_type::CSS) {
|
||||
else if (type == CSS) {
|
||||
delimiter_string = "}\n\r\t";
|
||||
}
|
||||
|
||||
else if (type == TEXT) {
|
||||
delimiter_string = "\n";
|
||||
}
|
||||
|
||||
else if (type == IP) {
|
||||
delimiter_string = ".";
|
||||
}
|
||||
|
||||
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
|
||||
if (end > start) {
|
||||
@ -209,8 +217,22 @@ string WordUtils::extract_url(const string& input) {
|
||||
return first_line.substr(method_end + 1, path_end - method_end - 1);
|
||||
}
|
||||
|
||||
string WordUtils::extract_user_agent(const string& input) {
|
||||
if (input.empty()) return "";
|
||||
|
||||
vector<string> lines = split_string(input, TEXT);
|
||||
|
||||
const unsigned short first_line_end = lines[2].find('\n');
|
||||
if (first_line_end == string::npos) return "";
|
||||
|
||||
string first_line = lines[2].substr(12, first_line_end);
|
||||
|
||||
return first_line;
|
||||
}
|
||||
|
||||
string WordUtils::extract_image_name(const string& input) {
|
||||
const unsigned short first_line_end = input.find('f');
|
||||
|
||||
if (first_line_end == string::npos) return "";
|
||||
|
||||
string first_line = input.substr(1, first_line_end);
|
||||
@ -244,6 +266,7 @@ unsigned int WordUtils::hash_url(const string& input) {
|
||||
|
||||
unsigned long WordUtils::djb2Hash(const string& str) {
|
||||
unsigned long hash = 5381;
|
||||
|
||||
for (const char c : str) {
|
||||
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
||||
}
|
||||
|
10
src/main.cpp
10
src/main.cpp
@ -7,6 +7,7 @@
|
||||
#include "../include/ConcurrentQueue.h"
|
||||
#include "../include/TrackerUtils.h"
|
||||
#include "../include/Track.h"
|
||||
#include "../include/MetricsExporter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -23,9 +24,14 @@ int main(int argc, const char* argv[]) {
|
||||
return 0;
|
||||
}*/
|
||||
|
||||
auto queue = std::make_shared<ConcurrentQueue<Track>>();
|
||||
cout << "lol" << endl;
|
||||
|
||||
thread(TrackerUtils::track, queue).detach();
|
||||
auto queue = std::make_shared<ConcurrentQueue<Track>>();
|
||||
auto metrics = std::make_shared<unordered_map<uint32_t, Crawler>>();
|
||||
|
||||
thread(TrackerUtils::track, queue, metrics).detach();
|
||||
|
||||
thread(MetricsExporter::serve, metrics).detach();
|
||||
|
||||
//argv[1]
|
||||
signal(SIGPIPE, SIG_IGN);
|
||||
|
Loading…
Reference in New Issue
Block a user