Somewhat production ready now. It's a bit slow, with 14 clients hammering on the server, the CPU reaches 80% ussage, and 16MB memory footprint.
This commit is contained in:
parent
18581ed9ac
commit
14279b5c06
27
Makefile
27
Makefile
@ -153,6 +153,30 @@ src/FileUtils.cpp.s:
|
|||||||
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/FileUtils.cpp.s
|
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/FileUtils.cpp.s
|
||||||
.PHONY : src/FileUtils.cpp.s
|
.PHONY : src/FileUtils.cpp.s
|
||||||
|
|
||||||
|
src/MetricsExporter.o: src/MetricsExporter.cpp.o
|
||||||
|
.PHONY : src/MetricsExporter.o
|
||||||
|
|
||||||
|
# target to build an object file
|
||||||
|
src/MetricsExporter.cpp.o:
|
||||||
|
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.o
|
||||||
|
.PHONY : src/MetricsExporter.cpp.o
|
||||||
|
|
||||||
|
src/MetricsExporter.i: src/MetricsExporter.cpp.i
|
||||||
|
.PHONY : src/MetricsExporter.i
|
||||||
|
|
||||||
|
# target to preprocess a source file
|
||||||
|
src/MetricsExporter.cpp.i:
|
||||||
|
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.i
|
||||||
|
.PHONY : src/MetricsExporter.cpp.i
|
||||||
|
|
||||||
|
src/MetricsExporter.s: src/MetricsExporter.cpp.s
|
||||||
|
.PHONY : src/MetricsExporter.s
|
||||||
|
|
||||||
|
# target to generate assembly for a file
|
||||||
|
src/MetricsExporter.cpp.s:
|
||||||
|
$(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.s
|
||||||
|
.PHONY : src/MetricsExporter.cpp.s
|
||||||
|
|
||||||
src/ServerUtils.o: src/ServerUtils.cpp.o
|
src/ServerUtils.o: src/ServerUtils.cpp.o
|
||||||
.PHONY : src/ServerUtils.o
|
.PHONY : src/ServerUtils.o
|
||||||
|
|
||||||
@ -261,6 +285,9 @@ help:
|
|||||||
@echo "... src/FileUtils.o"
|
@echo "... src/FileUtils.o"
|
||||||
@echo "... src/FileUtils.i"
|
@echo "... src/FileUtils.i"
|
||||||
@echo "... src/FileUtils.s"
|
@echo "... src/FileUtils.s"
|
||||||
|
@echo "... src/MetricsExporter.o"
|
||||||
|
@echo "... src/MetricsExporter.i"
|
||||||
|
@echo "... src/MetricsExporter.s"
|
||||||
@echo "... src/ServerUtils.o"
|
@echo "... src/ServerUtils.o"
|
||||||
@echo "... src/ServerUtils.i"
|
@echo "... src/ServerUtils.i"
|
||||||
@echo "... src/ServerUtils.s"
|
@echo "... src/ServerUtils.s"
|
||||||
|
@ -10,9 +10,9 @@ using namespace std;
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
class ConcurrentQueue {
|
class ConcurrentQueue {
|
||||||
private:
|
private:
|
||||||
std::queue<T> queue_;
|
queue<T> queue_;
|
||||||
mutable std::mutex mutex_;
|
mutable mutex mutex_;
|
||||||
std::condition_variable condition_;
|
condition_variable condition_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class.
|
// Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class.
|
||||||
@ -49,6 +49,11 @@ public:
|
|||||||
lock_guard<mutex> lock(mutex_);
|
lock_guard<mutex> lock(mutex_);
|
||||||
return queue_.empty();
|
return queue_.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int size() {
|
||||||
|
lock_guard<mutex> lock(mutex_);
|
||||||
|
return queue_.size();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
13
include/Crawler.h
Normal file
13
include/Crawler.h
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
#ifndef CRAWLER_H
|
||||||
|
#define CRAWLER_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct Crawler {
|
||||||
|
string user_agent;
|
||||||
|
int links_pressed;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -4,7 +4,9 @@
|
|||||||
enum data_type {
|
enum data_type {
|
||||||
HTML,
|
HTML,
|
||||||
CSS,
|
CSS,
|
||||||
IMAGE
|
IMAGE,
|
||||||
|
TEXT,
|
||||||
|
IP
|
||||||
};
|
};
|
||||||
|
|
||||||
enum image_type {
|
enum image_type {
|
||||||
|
34
include/MetricsExporter.h
Normal file
34
include/MetricsExporter.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
//
|
||||||
|
// Created by skingging on 5/16/25.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef METRICSEXPORTER_H
|
||||||
|
#define METRICSEXPORTER_H
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "../include/Crawler.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
struct MetricsExporter {
|
||||||
|
static void serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler);
|
||||||
|
static void process_request(int client_fd);
|
||||||
|
static void send_data(int client_fd, const string& data);
|
||||||
|
};
|
||||||
|
|
||||||
|
const string HTML_RESPONSE_HEADER_lol =
|
||||||
|
"HTTP/1.1 200 OK\r\n"
|
||||||
|
"Content-Type: text/html; charset=utf-8\r\n"
|
||||||
|
"Transfer-Encoding: chunked\r\n"
|
||||||
|
//"Cache-control: max-age=12000\r\n"
|
||||||
|
"Connection: close\r\n\r\n";
|
||||||
|
|
||||||
|
const string BEGINNING =
|
||||||
|
"<!DOCTYPE html>\n<html><head>\r\n"
|
||||||
|
"<title>Drip</title>\r\n"
|
||||||
|
"</head><body>\r\n";
|
||||||
|
|
||||||
|
const string END = "</body></html>\r\n";
|
||||||
|
|
||||||
|
#endif
|
@ -11,7 +11,7 @@ using namespace std;
|
|||||||
|
|
||||||
class ServerUtils {
|
class ServerUtils {
|
||||||
public:
|
public:
|
||||||
static void serve(shared_ptr<ConcurrentQueue<Track>> t_test);
|
static void serve(shared_ptr<ConcurrentQueue<Track>> cq_track);
|
||||||
private:
|
private:
|
||||||
static void process_request(int client_fd);
|
static void process_request(int client_fd);
|
||||||
static void send_header(int client_fd, data_type type);
|
static void send_header(int client_fd, data_type type);
|
||||||
@ -27,7 +27,7 @@ const string HTML_RESPONSE_HEADER =
|
|||||||
"HTTP/1.1 200 OK\r\n"
|
"HTTP/1.1 200 OK\r\n"
|
||||||
"Content-Type: text/html; charset=utf-8\r\n"
|
"Content-Type: text/html; charset=utf-8\r\n"
|
||||||
"Transfer-Encoding: chunked\r\n"
|
"Transfer-Encoding: chunked\r\n"
|
||||||
"Cache-control: max-age=12000\r\n"
|
//"Cache-control: max-age=12000\r\n"
|
||||||
"Connection: close\r\n\r\n";
|
"Connection: close\r\n\r\n";
|
||||||
|
|
||||||
const string CSS_RESPONSE_HEADER =
|
const string CSS_RESPONSE_HEADER =
|
||||||
|
@ -4,13 +4,14 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "../include/ConcurrentQueue.h"
|
#include "../include/ConcurrentQueue.h"
|
||||||
|
#include "../include/Crawler.h"
|
||||||
#include "../include/Track.h"
|
#include "../include/Track.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
struct TrackerUtils {
|
struct TrackerUtils {
|
||||||
static void track(const shared_ptr<ConcurrentQueue<Track>>& t_test);
|
static void track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler);
|
||||||
static void print(unordered_map<string, int> tracks);
|
static void print();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,6 +15,7 @@ struct WordUtils {
|
|||||||
static string load_file(const string& path);
|
static string load_file(const string& path);
|
||||||
static vector<string> split_string(const string& input, data_type type);
|
static vector<string> split_string(const string& input, data_type type);
|
||||||
static string extract_url(const string& input);
|
static string extract_url(const string& input);
|
||||||
|
static string extract_user_agent(const string& input);
|
||||||
static bool contains_image(const string& input);
|
static bool contains_image(const string& input);
|
||||||
static string extract_image_name(const string& input);
|
static string extract_image_name(const string& input);
|
||||||
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
static string create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash);
|
||||||
|
72
src/MetricsExporter.cpp
Normal file
72
src/MetricsExporter.cpp
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#include "../include/MetricsExporter.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#include <thread>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cstring>
|
||||||
|
#include <random>
|
||||||
|
#include <ranges>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
|
||||||
|
shared_ptr<unordered_map<uint32_t, Crawler>> crawler_;
|
||||||
|
|
||||||
|
void [[noreturn]] MetricsExporter::serve(shared_ptr<unordered_map<uint32_t, Crawler>> crawler) {
|
||||||
|
crawler_ = move(crawler);
|
||||||
|
|
||||||
|
// server_fd is a file descriptor.
|
||||||
|
const int server_fd = socket(AF_INET, SOCK_STREAM, 0);
|
||||||
|
|
||||||
|
sockaddr_in addr{};
|
||||||
|
addr.sin_family = AF_INET;
|
||||||
|
addr.sin_port = htons(8889);
|
||||||
|
addr.sin_addr.s_addr = INADDR_ANY;
|
||||||
|
|
||||||
|
if (bind(server_fd, reinterpret_cast<sockaddr *>(&addr), sizeof(addr)) != 0) {
|
||||||
|
cout << "Please wait for the pipe to close.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
listen(server_fd, 50);
|
||||||
|
|
||||||
|
cout << "Server is running on http://localhost:8889 \n";
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int client_fd = accept(server_fd, nullptr, nullptr);
|
||||||
|
|
||||||
|
thread(process_request, client_fd).detach();
|
||||||
|
}
|
||||||
|
|
||||||
|
close(server_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetricsExporter::process_request(const int client_fd) {
|
||||||
|
send_data(client_fd, HTML_RESPONSE_HEADER_lol);
|
||||||
|
|
||||||
|
ostringstream oss;
|
||||||
|
|
||||||
|
oss << BEGINNING;
|
||||||
|
|
||||||
|
for (auto it = crawler_->begin(); it != crawler_->end(); ++it) {
|
||||||
|
oss << it->second.links_pressed << ": " << it->second.user_agent << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
oss << END;
|
||||||
|
|
||||||
|
send_data(client_fd, oss.str());
|
||||||
|
send(client_fd, "0\r\n\r\n", 5, 0);
|
||||||
|
close(client_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MetricsExporter::send_data(const int client_fd, const string& data) {
|
||||||
|
ostringstream oss;
|
||||||
|
|
||||||
|
oss << hex << data.size() << "\r\n" << data << "\r\n";
|
||||||
|
|
||||||
|
send(client_fd, oss.str().c_str(), oss.str().size(), 0);
|
||||||
|
}
|
@ -21,10 +21,10 @@
|
|||||||
vector<string> css;
|
vector<string> css;
|
||||||
vector<string> images;
|
vector<string> images;
|
||||||
vector<unordered_map<string, unordered_map<string, int>>> all_lists;
|
vector<unordered_map<string, unordered_map<string, int>>> all_lists;
|
||||||
shared_ptr<ConcurrentQueue<Track>> t_test;
|
shared_ptr<ConcurrentQueue<Track>> tracks;
|
||||||
|
|
||||||
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> cq_track) {
|
||||||
t_test = std::move(test);
|
tracks = std::move(cq_track);
|
||||||
css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css");
|
css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css");
|
||||||
images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/");
|
images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/");
|
||||||
const vector<string> words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/");
|
const vector<string> words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/");
|
||||||
@ -52,7 +52,9 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
|||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
int client_fd = accept(server_fd, nullptr, nullptr);
|
int client_fd = accept(server_fd, nullptr, nullptr);
|
||||||
|
|
||||||
|
if (client_fd == -1) continue;
|
||||||
|
//cerr << client_fd << endl;
|
||||||
thread(process_request, client_fd).detach();
|
thread(process_request, client_fd).detach();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,24 +62,35 @@ void [[noreturn]] ServerUtils::serve(shared_ptr<ConcurrentQueue<Track>> test) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::process_request(const int client_fd) {
|
void ServerUtils::process_request(const int client_fd) {
|
||||||
|
string ip = get_ip(client_fd);
|
||||||
|
|
||||||
char buffer[1024];
|
char buffer[1024];
|
||||||
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
|
const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0);
|
||||||
|
|
||||||
string url;
|
string url;
|
||||||
|
string user_agent;
|
||||||
|
|
||||||
if (bytes_received > 0) {
|
if (bytes_received > 0) {
|
||||||
buffer[bytes_received] = '\0';
|
buffer[bytes_received] = '\0';
|
||||||
|
|
||||||
url = WordUtils::extract_url(string(buffer));
|
url = WordUtils::extract_url(string(buffer));
|
||||||
|
user_agent = WordUtils::extract_user_agent(string(buffer));
|
||||||
|
|
||||||
if (url.empty()) {
|
if (url.empty()) {
|
||||||
close(client_fd);
|
close(client_fd);
|
||||||
|
cerr << "AAAA \n";
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
close(client_fd);
|
close(client_fd);
|
||||||
cerr << "AAAA \n";
|
cerr << "AAAA \n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Track track;
|
||||||
|
track.Ip = ip;
|
||||||
|
track.UserAgent = user_agent;
|
||||||
|
|
||||||
|
tracks->push(track);
|
||||||
|
|
||||||
if (url == "/style.css") {
|
if (url == "/style.css") {
|
||||||
// This sends the header, that instructs how the browser should interpret the data.
|
// This sends the header, that instructs how the browser should interpret the data.
|
||||||
send_header(client_fd, data_type::CSS);
|
send_header(client_fd, data_type::CSS);
|
||||||
@ -95,13 +108,6 @@ void ServerUtils::process_request(const int client_fd) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
Track track;
|
|
||||||
track.Ip = get_ip(client_fd);
|
|
||||||
|
|
||||||
// Testing only.
|
|
||||||
track.UserAgent = url;
|
|
||||||
t_test->push(track);
|
|
||||||
|
|
||||||
const unsigned long hash3 = WordUtils::fnv1aHash(url);
|
const unsigned long hash3 = WordUtils::fnv1aHash(url);
|
||||||
|
|
||||||
send_header(client_fd, HTML);
|
send_header(client_fd, HTML);
|
||||||
@ -137,11 +143,13 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
|||||||
minstd_rand generator(hash);
|
minstd_rand generator(hash);
|
||||||
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
|
uniform_int_distribution<unsigned short> distribution_1(0, end - 1);
|
||||||
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
|
uniform_int_distribution<unsigned short> distribution_2(0, images.size() - 1);
|
||||||
uniform_int_distribution<unsigned short> distribution_3(0, 8);
|
uniform_int_distribution<unsigned short> distribution_3(0, 8); // There is only 9 wordlists.
|
||||||
|
uniform_int_distribution<unsigned short> distribution_4(0, end - 2);
|
||||||
|
|
||||||
const int link = distribution_1(generator);
|
const int link = distribution_1(generator);
|
||||||
const int image = distribution_2(generator);
|
const int image = distribution_2(generator);
|
||||||
const int l = distribution_3(generator);
|
const int l = distribution_3(generator);
|
||||||
|
const int img = distribution_4(generator);
|
||||||
|
|
||||||
while (itr < end) {
|
while (itr < end) {
|
||||||
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
|
send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr]));
|
||||||
@ -150,11 +158,11 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
|||||||
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
|
send_data(client_fd, WordUtils::create_link(all_lists[l], hash));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (itr == link) {
|
if (itr == img) {
|
||||||
send_data(client_fd, WordUtils::create_image(images[image]));
|
send_data(client_fd, WordUtils::create_image(images[image]));
|
||||||
}
|
}
|
||||||
|
|
||||||
this_thread::sleep_for(chrono::milliseconds(25));
|
//this_thread::sleep_for(chrono::milliseconds(75));
|
||||||
|
|
||||||
itr++;
|
itr++;
|
||||||
}
|
}
|
||||||
@ -165,8 +173,8 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ServerUtils::send_chunked_css(const int client_fd) {
|
void ServerUtils::send_chunked_css(const int client_fd) {
|
||||||
for (size_t i = 0; i < css.size(); i++) {
|
for (const auto & cs : css) {
|
||||||
send_data(client_fd, css[i]);
|
send_data(client_fd, cs);
|
||||||
|
|
||||||
this_thread::sleep_for(chrono::milliseconds(25));
|
this_thread::sleep_for(chrono::milliseconds(25));
|
||||||
}
|
}
|
||||||
@ -219,7 +227,7 @@ void ServerUtils::send_data(const int client_fd, const string& data) {
|
|||||||
string ServerUtils::get_ip(const int client_fd) {
|
string ServerUtils::get_ip(const int client_fd) {
|
||||||
sockaddr_storage addr{};
|
sockaddr_storage addr{};
|
||||||
socklen_t len = sizeof(addr);
|
socklen_t len = sizeof(addr);
|
||||||
char ipstr[INET6_ADDRSTRLEN];
|
char buf[INET6_ADDRSTRLEN];
|
||||||
|
|
||||||
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
|
if (getpeername(client_fd, reinterpret_cast<sockaddr *>(&addr), &len) == -1) {
|
||||||
perror("getpeername");
|
perror("getpeername");
|
||||||
@ -228,15 +236,11 @@ string ServerUtils::get_ip(const int client_fd) {
|
|||||||
|
|
||||||
if (addr.ss_family == AF_INET) {
|
if (addr.ss_family == AF_INET) {
|
||||||
// IPv4
|
// IPv4
|
||||||
sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
const sockaddr_in* s = reinterpret_cast<sockaddr_in *>(&addr);
|
||||||
inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof(ipstr));
|
inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf));
|
||||||
} else if (addr.ss_family == AF_INET6) {
|
|
||||||
// IPv6
|
|
||||||
sockaddr_in6* s = reinterpret_cast<sockaddr_in6 *>(&addr);
|
|
||||||
inet_ntop(AF_INET6, &s->sin6_addr, ipstr, sizeof(ipstr));
|
|
||||||
} else {
|
} else {
|
||||||
return "Unknown address family";
|
return "Unknown address family";
|
||||||
}
|
}
|
||||||
|
|
||||||
return string(ipstr);
|
return buf;
|
||||||
}
|
}
|
@ -1,34 +1,53 @@
|
|||||||
#include "../include/TrackerUtils.h"
|
|
||||||
#include "../include/ConcurrentQueue.h"
|
#include "../include/ConcurrentQueue.h"
|
||||||
|
#include "../include/TrackerUtils.h"
|
||||||
|
#include "../include/WordUtils.h"
|
||||||
|
#include "../include/Crawler.h"
|
||||||
#include "../include/Track.h"
|
#include "../include/Track.h"
|
||||||
#include <iostream>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <iostream>
|
||||||
|
#include <utility>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <utility>
|
#include <ranges>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
void TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>> &t_test) {
|
shared_ptr<unordered_map<uint32_t, Crawler>> urls;
|
||||||
bool running = true;
|
|
||||||
unordered_map<string, int> urls;
|
void [[noreturn]] TrackerUtils::track(const shared_ptr<ConcurrentQueue<Track>>& s_track, const shared_ptr<unordered_map<uint32_t, Crawler>>& crawler) {
|
||||||
|
urls = crawler;
|
||||||
|
|
||||||
|
constexpr bool running = true;
|
||||||
|
|
||||||
while (running) {
|
while (running) {
|
||||||
auto [Ip, UserAgent] = t_test->wait_and_pop();
|
Track track;
|
||||||
|
|
||||||
urls[UserAgent]++;
|
if (!s_track->try_pop(track)) {
|
||||||
|
this_thread::sleep_for(chrono::milliseconds(100));
|
||||||
system("clear");
|
continue;
|
||||||
print(urls);
|
|
||||||
|
|
||||||
if (Ip == "STOP") {
|
|
||||||
running = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vector<string> temp = WordUtils::split_string(track.Ip, IP);
|
||||||
|
const int ip1 = stoi(temp[0]);
|
||||||
|
const int ip2 = stoi(temp[1]);
|
||||||
|
const int ip3 = stoi(temp[2]);
|
||||||
|
const int ip4 = stoi(temp[3]);
|
||||||
|
|
||||||
|
const uint32_t ip = static_cast<uint32_t>(ip1 << 24) | static_cast<uint32_t>(ip2 << 16) | static_cast<uint32_t>(ip3 << 8) | static_cast<uint32_t>(ip4);
|
||||||
|
|
||||||
|
(*urls)[ip].links_pressed++;
|
||||||
|
(*urls)[ip].user_agent = track.UserAgent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void TrackerUtils::print(unordered_map<string, int> tracks) {
|
void [[noreturn]] TrackerUtils::print() {
|
||||||
for (auto it = tracks.begin(); it != tracks.end(); ++it) {
|
this_thread::sleep_for(chrono::milliseconds(5000));
|
||||||
cerr << it->first << ": " << it->second << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
this_thread::sleep_for(chrono::milliseconds(250));
|
while (true) {
|
||||||
|
for (const auto &[user_agent, links_pressed]: *urls | views::values) {
|
||||||
|
cerr << links_pressed << ": " << user_agent << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
this_thread::sleep_for(chrono::milliseconds(1000));
|
||||||
|
system("clear"); // Testing only
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,7 @@ vector<string> WordUtils::predict_next_word(const string& input, const unordered
|
|||||||
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
string WordUtils::create_tag(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const char& hash) {
|
||||||
constexpr unsigned char predict_num = 5;
|
constexpr unsigned char predict_num = 5;
|
||||||
|
|
||||||
const string start_words[3] = {"the", "but", "with"};
|
const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
||||||
|
|
||||||
vector<string> tags;
|
vector<string> tags;
|
||||||
|
|
||||||
@ -62,15 +62,17 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
|||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
|
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
|
||||||
|
|
||||||
|
const string lol = start_words[outer_distribution(generator)];
|
||||||
|
|
||||||
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
|
tags.push_back(lol);
|
||||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
||||||
|
|
||||||
// Words inside the <p> tag
|
// Words inside the <p> tag
|
||||||
for (unsigned short j = 0; j < 25; j++)
|
for (unsigned short j = 0; j < 25; j++) {
|
||||||
{
|
|
||||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||||
@ -80,8 +82,8 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
|||||||
|
|
||||||
string temp_string = "<p>";
|
string temp_string = "<p>";
|
||||||
|
|
||||||
for (size_t l = 0; l < tags.size(); l++) {
|
for (const auto & tag : tags) {
|
||||||
temp_string += tags[l];
|
temp_string += tag;
|
||||||
temp_string += " ";
|
temp_string += " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,25 +93,26 @@ string WordUtils::create_tag(const unordered_map<string, unordered_map<string, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
|
string WordUtils::create_link(const unordered_map<string, unordered_map<string, int>>& word_frequencies, const unsigned long hash) {
|
||||||
constexpr unsigned char predict_num = 5;
|
constexpr unsigned char predict_num = 10;
|
||||||
|
|
||||||
const string start_words[10] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"};
|
const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"};
|
||||||
|
|
||||||
vector<string> tags;
|
vector<string> tags;
|
||||||
|
|
||||||
minstd_rand generator(hash);
|
minstd_rand generator(hash);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->length() - 1);
|
uniform_int_distribution<unsigned short> outer_distribution(0, start_words->size() - 1);
|
||||||
|
|
||||||
vector<string> temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num);
|
const string lol = start_words[outer_distribution(generator)];
|
||||||
|
vector<string> temp_words = predict_next_word(lol, word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> outer_2_distribution(0, temp_words.size() - 1);
|
||||||
|
|
||||||
|
tags.push_back(lol);
|
||||||
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
tags.push_back(temp_words[outer_2_distribution(generator)]);
|
||||||
|
|
||||||
// Words inside the <p> tag
|
// Words inside the <p> tag
|
||||||
for (unsigned short j = 0; j < 7; j++)
|
for (unsigned short j = 0; j < 7; j++) {
|
||||||
{
|
|
||||||
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
temp_words = predict_next_word(tags[j], word_frequencies, predict_num);
|
||||||
|
|
||||||
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
uniform_int_distribution<unsigned short> inner_distribution(0, temp_words.size() - 1);
|
||||||
@ -120,8 +123,8 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
|
|||||||
string temp_string_2 = "<a href=\"";
|
string temp_string_2 = "<a href=\"";
|
||||||
string temp_string;
|
string temp_string;
|
||||||
|
|
||||||
for (size_t l = 0; l < tags.size(); l++) {
|
for (const auto & tag : tags) {
|
||||||
temp_string += tags[l];
|
temp_string += tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
temp_string_2 += temp_string;
|
temp_string_2 += temp_string;
|
||||||
@ -136,7 +139,6 @@ string WordUtils::create_link(const unordered_map<string, unordered_map<string,
|
|||||||
}
|
}
|
||||||
|
|
||||||
string WordUtils::create_image(const string& image) {
|
string WordUtils::create_image(const string& image) {
|
||||||
|
|
||||||
string temp_1 = R"(<img loading="lazy" src="/)";
|
string temp_1 = R"(<img loading="lazy" src="/)";
|
||||||
temp_1 += image;
|
temp_1 += image;
|
||||||
temp_1 += ".avif";
|
temp_1 += ".avif";
|
||||||
@ -144,23 +146,29 @@ string WordUtils::create_image(const string& image) {
|
|||||||
return temp_1;
|
return temp_1;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> WordUtils::split_string(const string& input, data_type type) {
|
vector<string> WordUtils::split_string(const string& input, const data_type type) {
|
||||||
vector<string> data;
|
vector<string> data;
|
||||||
|
|
||||||
size_t start = 0;
|
size_t start = 0;
|
||||||
size_t end = 0;
|
size_t end = 0;
|
||||||
|
|
||||||
// Create a string from the delimiters array
|
|
||||||
string delimiter_string;
|
string delimiter_string;
|
||||||
|
|
||||||
if (type == data_type::HTML) {
|
if (type == HTML) {
|
||||||
delimiter_string = " .,!?;:()\n\r\t";
|
delimiter_string = " .,!?;:()\n\r\t";
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (type == data_type::CSS) {
|
else if (type == CSS) {
|
||||||
delimiter_string = "}\n\r\t";
|
delimiter_string = "}\n\r\t";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
else if (type == TEXT) {
|
||||||
|
delimiter_string = "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (type == IP) {
|
||||||
|
delimiter_string = ".";
|
||||||
|
}
|
||||||
|
|
||||||
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
|
while ((end = input.find_first_of(delimiter_string, start)) != string::npos) {
|
||||||
if (end > start) {
|
if (end > start) {
|
||||||
@ -209,8 +217,22 @@ string WordUtils::extract_url(const string& input) {
|
|||||||
return first_line.substr(method_end + 1, path_end - method_end - 1);
|
return first_line.substr(method_end + 1, path_end - method_end - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string WordUtils::extract_user_agent(const string& input) {
|
||||||
|
if (input.empty()) return "";
|
||||||
|
|
||||||
|
vector<string> lines = split_string(input, TEXT);
|
||||||
|
|
||||||
|
const unsigned short first_line_end = lines[2].find('\n');
|
||||||
|
if (first_line_end == string::npos) return "";
|
||||||
|
|
||||||
|
string first_line = lines[2].substr(12, first_line_end);
|
||||||
|
|
||||||
|
return first_line;
|
||||||
|
}
|
||||||
|
|
||||||
string WordUtils::extract_image_name(const string& input) {
|
string WordUtils::extract_image_name(const string& input) {
|
||||||
const unsigned short first_line_end = input.find('f');
|
const unsigned short first_line_end = input.find('f');
|
||||||
|
|
||||||
if (first_line_end == string::npos) return "";
|
if (first_line_end == string::npos) return "";
|
||||||
|
|
||||||
string first_line = input.substr(1, first_line_end);
|
string first_line = input.substr(1, first_line_end);
|
||||||
@ -244,6 +266,7 @@ unsigned int WordUtils::hash_url(const string& input) {
|
|||||||
|
|
||||||
unsigned long WordUtils::djb2Hash(const string& str) {
|
unsigned long WordUtils::djb2Hash(const string& str) {
|
||||||
unsigned long hash = 5381;
|
unsigned long hash = 5381;
|
||||||
|
|
||||||
for (const char c : str) {
|
for (const char c : str) {
|
||||||
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
||||||
}
|
}
|
||||||
|
10
src/main.cpp
10
src/main.cpp
@ -7,6 +7,7 @@
|
|||||||
#include "../include/ConcurrentQueue.h"
|
#include "../include/ConcurrentQueue.h"
|
||||||
#include "../include/TrackerUtils.h"
|
#include "../include/TrackerUtils.h"
|
||||||
#include "../include/Track.h"
|
#include "../include/Track.h"
|
||||||
|
#include "../include/MetricsExporter.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -23,9 +24,14 @@ int main(int argc, const char* argv[]) {
|
|||||||
return 0;
|
return 0;
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
auto queue = std::make_shared<ConcurrentQueue<Track>>();
|
cout << "lol" << endl;
|
||||||
|
|
||||||
thread(TrackerUtils::track, queue).detach();
|
auto queue = std::make_shared<ConcurrentQueue<Track>>();
|
||||||
|
auto metrics = std::make_shared<unordered_map<uint32_t, Crawler>>();
|
||||||
|
|
||||||
|
thread(TrackerUtils::track, queue, metrics).detach();
|
||||||
|
|
||||||
|
thread(MetricsExporter::serve, metrics).detach();
|
||||||
|
|
||||||
//argv[1]
|
//argv[1]
|
||||||
signal(SIGPIPE, SIG_IGN);
|
signal(SIGPIPE, SIG_IGN);
|
||||||
|
Loading…
Reference in New Issue
Block a user