diff --git a/Makefile b/Makefile index e2f88a9..fea2aef 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,30 @@ src/FileUtils.cpp.s: $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/FileUtils.cpp.s .PHONY : src/FileUtils.cpp.s +src/MetricsExporter.o: src/MetricsExporter.cpp.o +.PHONY : src/MetricsExporter.o + +# target to build an object file +src/MetricsExporter.cpp.o: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.o +.PHONY : src/MetricsExporter.cpp.o + +src/MetricsExporter.i: src/MetricsExporter.cpp.i +.PHONY : src/MetricsExporter.i + +# target to preprocess a source file +src/MetricsExporter.cpp.i: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.i +.PHONY : src/MetricsExporter.cpp.i + +src/MetricsExporter.s: src/MetricsExporter.cpp.s +.PHONY : src/MetricsExporter.s + +# target to generate assembly for a file +src/MetricsExporter.cpp.s: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/MetricsExporter.cpp.s +.PHONY : src/MetricsExporter.cpp.s + src/ServerUtils.o: src/ServerUtils.cpp.o .PHONY : src/ServerUtils.o @@ -261,6 +285,9 @@ help: @echo "... src/FileUtils.o" @echo "... src/FileUtils.i" @echo "... src/FileUtils.s" + @echo "... src/MetricsExporter.o" + @echo "... src/MetricsExporter.i" + @echo "... src/MetricsExporter.s" @echo "... src/ServerUtils.o" @echo "... src/ServerUtils.i" @echo "... src/ServerUtils.s" diff --git a/include/ConcurrentQueue.h b/include/ConcurrentQueue.h index c4b41eb..ed85827 100644 --- a/include/ConcurrentQueue.h +++ b/include/ConcurrentQueue.h @@ -10,9 +10,9 @@ using namespace std; template class ConcurrentQueue { private: - std::queue queue_; - mutable std::mutex mutex_; - std::condition_variable condition_; + queue queue_; + mutable mutex mutex_; + condition_variable condition_; public: // Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class. @@ -49,6 +49,11 @@ public: lock_guard lock(mutex_); return queue_.empty(); } + + int size() { + lock_guard lock(mutex_); + return queue_.size(); + } }; #endif diff --git a/include/Crawler.h b/include/Crawler.h new file mode 100644 index 0000000..3fbdc23 --- /dev/null +++ b/include/Crawler.h @@ -0,0 +1,13 @@ +#ifndef CRAWLER_H +#define CRAWLER_H + +#include + +using namespace std; + +struct Crawler { + string user_agent; + int links_pressed; +}; + +#endif diff --git a/include/DataType.h b/include/DataType.h index 03391bc..9cc10c3 100644 --- a/include/DataType.h +++ b/include/DataType.h @@ -4,7 +4,9 @@ enum data_type { HTML, CSS, - IMAGE + IMAGE, + TEXT, + IP }; enum image_type { diff --git a/include/MetricsExporter.h b/include/MetricsExporter.h new file mode 100644 index 0000000..f5fc86d --- /dev/null +++ b/include/MetricsExporter.h @@ -0,0 +1,34 @@ +// +// Created by skingging on 5/16/25. +// + +#ifndef METRICSEXPORTER_H +#define METRICSEXPORTER_H +#include +#include + +#include "../include/Crawler.h" + +using namespace std; + +struct MetricsExporter { + static void serve(shared_ptr> crawler); + static void process_request(int client_fd); + static void send_data(int client_fd, const string& data); +}; + +const string HTML_RESPONSE_HEADER_lol = + "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html; charset=utf-8\r\n" + "Transfer-Encoding: chunked\r\n" + //"Cache-control: max-age=12000\r\n" + "Connection: close\r\n\r\n"; + +const string BEGINNING = + "\n\r\n" + "Drip\r\n" + "\r\n"; + +const string END = "\r\n"; + +#endif diff --git a/include/ServerUtils.h b/include/ServerUtils.h index 98b8c0e..582dbc0 100644 --- a/include/ServerUtils.h +++ b/include/ServerUtils.h @@ -11,7 +11,7 @@ using namespace std; class ServerUtils { public: - static void serve(shared_ptr> t_test); + static void serve(shared_ptr> cq_track); private: static void process_request(int client_fd); static void send_header(int client_fd, data_type type); @@ -27,7 +27,7 @@ const string HTML_RESPONSE_HEADER = "HTTP/1.1 200 OK\r\n" "Content-Type: text/html; charset=utf-8\r\n" "Transfer-Encoding: chunked\r\n" - "Cache-control: max-age=12000\r\n" + //"Cache-control: max-age=12000\r\n" "Connection: close\r\n\r\n"; const string CSS_RESPONSE_HEADER = diff --git a/include/TrackerUtils.h b/include/TrackerUtils.h index ed06326..5e1c0bc 100644 --- a/include/TrackerUtils.h +++ b/include/TrackerUtils.h @@ -4,13 +4,14 @@ #include #include "../include/ConcurrentQueue.h" +#include "../include/Crawler.h" #include "../include/Track.h" using namespace std; struct TrackerUtils { - static void track(const shared_ptr>& t_test); - static void print(unordered_map tracks); + static void track(const shared_ptr>& s_track, const shared_ptr>& crawler); + static void print(); }; #endif diff --git a/include/WordUtils.h b/include/WordUtils.h index 0062b76..a4ffd60 100644 --- a/include/WordUtils.h +++ b/include/WordUtils.h @@ -15,6 +15,7 @@ struct WordUtils { static string load_file(const string& path); static vector split_string(const string& input, data_type type); static string extract_url(const string& input); + static string extract_user_agent(const string& input); static bool contains_image(const string& input); static string extract_image_name(const string& input); static string create_tag(const unordered_map>& word_frequencies, const char& hash); diff --git a/src/MetricsExporter.cpp b/src/MetricsExporter.cpp new file mode 100644 index 0000000..52d8699 --- /dev/null +++ b/src/MetricsExporter.cpp @@ -0,0 +1,72 @@ +#include "../include/MetricsExporter.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +shared_ptr> crawler_; + +void [[noreturn]] MetricsExporter::serve(shared_ptr> crawler) { + crawler_ = move(crawler); + + // server_fd is a file descriptor. + const int server_fd = socket(AF_INET, SOCK_STREAM, 0); + + sockaddr_in addr{}; + addr.sin_family = AF_INET; + addr.sin_port = htons(8889); + addr.sin_addr.s_addr = INADDR_ANY; + + if (bind(server_fd, reinterpret_cast(&addr), sizeof(addr)) != 0) { + cout << "Please wait for the pipe to close."; + return; + } + + listen(server_fd, 50); + + cout << "Server is running on http://localhost:8889 \n"; + + while (true) { + int client_fd = accept(server_fd, nullptr, nullptr); + + thread(process_request, client_fd).detach(); + } + + close(server_fd); +} + +void MetricsExporter::process_request(const int client_fd) { + send_data(client_fd, HTML_RESPONSE_HEADER_lol); + + ostringstream oss; + + oss << BEGINNING; + + for (auto it = crawler_->begin(); it != crawler_->end(); ++it) { + oss << it->second.links_pressed << ": " << it->second.user_agent << endl; + } + + oss << END; + + send_data(client_fd, oss.str()); + send(client_fd, "0\r\n\r\n", 5, 0); + close(client_fd); +} + +void MetricsExporter::send_data(const int client_fd, const string& data) { + ostringstream oss; + + oss << hex << data.size() << "\r\n" << data << "\r\n"; + + send(client_fd, oss.str().c_str(), oss.str().size(), 0); +} \ No newline at end of file diff --git a/src/ServerUtils.cpp b/src/ServerUtils.cpp index 80a6fab..b40d289 100644 --- a/src/ServerUtils.cpp +++ b/src/ServerUtils.cpp @@ -21,10 +21,10 @@ vector css; vector images; vector>> all_lists; -shared_ptr> t_test; +shared_ptr> tracks; -void [[noreturn]] ServerUtils::serve(shared_ptr> test) { - t_test = std::move(test); +void [[noreturn]] ServerUtils::serve(shared_ptr> cq_track) { + tracks = std::move(cq_track); css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css"); images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/"); const vector words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/"); @@ -52,7 +52,9 @@ void [[noreturn]] ServerUtils::serve(shared_ptr> test) { while (true) { int client_fd = accept(server_fd, nullptr, nullptr); - + + if (client_fd == -1) continue; + //cerr << client_fd << endl; thread(process_request, client_fd).detach(); } @@ -60,24 +62,35 @@ void [[noreturn]] ServerUtils::serve(shared_ptr> test) { } void ServerUtils::process_request(const int client_fd) { + string ip = get_ip(client_fd); + char buffer[1024]; const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0); string url; + string user_agent; if (bytes_received > 0) { buffer[bytes_received] = '\0'; url = WordUtils::extract_url(string(buffer)); + user_agent = WordUtils::extract_user_agent(string(buffer)); if (url.empty()) { close(client_fd); + cerr << "AAAA \n"; } } else { close(client_fd); cerr << "AAAA \n"; } + Track track; + track.Ip = ip; + track.UserAgent = user_agent; + + tracks->push(track); + if (url == "/style.css") { // This sends the header, that instructs how the browser should interpret the data. send_header(client_fd, data_type::CSS); @@ -95,13 +108,6 @@ void ServerUtils::process_request(const int client_fd) { } else { - Track track; - track.Ip = get_ip(client_fd); - - // Testing only. - track.UserAgent = url; - t_test->push(track); - const unsigned long hash3 = WordUtils::fnv1aHash(url); send_header(client_fd, HTML); @@ -137,11 +143,13 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) { minstd_rand generator(hash); uniform_int_distribution distribution_1(0, end - 1); uniform_int_distribution distribution_2(0, images.size() - 1); - uniform_int_distribution distribution_3(0, 8); + uniform_int_distribution distribution_3(0, 8); // There is only 9 wordlists. + uniform_int_distribution distribution_4(0, end - 2); const int link = distribution_1(generator); const int image = distribution_2(generator); const int l = distribution_3(generator); + const int img = distribution_4(generator); while (itr < end) { send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr])); @@ -150,11 +158,11 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) { send_data(client_fd, WordUtils::create_link(all_lists[l], hash)); } - if (itr == link) { + if (itr == img) { send_data(client_fd, WordUtils::create_image(images[image])); } - this_thread::sleep_for(chrono::milliseconds(25)); + //this_thread::sleep_for(chrono::milliseconds(75)); itr++; } @@ -165,8 +173,8 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) { } void ServerUtils::send_chunked_css(const int client_fd) { - for (size_t i = 0; i < css.size(); i++) { - send_data(client_fd, css[i]); + for (const auto & cs : css) { + send_data(client_fd, cs); this_thread::sleep_for(chrono::milliseconds(25)); } @@ -219,7 +227,7 @@ void ServerUtils::send_data(const int client_fd, const string& data) { string ServerUtils::get_ip(const int client_fd) { sockaddr_storage addr{}; socklen_t len = sizeof(addr); - char ipstr[INET6_ADDRSTRLEN]; + char buf[INET6_ADDRSTRLEN]; if (getpeername(client_fd, reinterpret_cast(&addr), &len) == -1) { perror("getpeername"); @@ -228,15 +236,11 @@ string ServerUtils::get_ip(const int client_fd) { if (addr.ss_family == AF_INET) { // IPv4 - sockaddr_in* s = reinterpret_cast(&addr); - inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof(ipstr)); - } else if (addr.ss_family == AF_INET6) { - // IPv6 - sockaddr_in6* s = reinterpret_cast(&addr); - inet_ntop(AF_INET6, &s->sin6_addr, ipstr, sizeof(ipstr)); + const sockaddr_in* s = reinterpret_cast(&addr); + inet_ntop(AF_INET, &s->sin_addr, buf, sizeof(buf)); } else { return "Unknown address family"; } - return string(ipstr); + return buf; } \ No newline at end of file diff --git a/src/TrackerUtils.cpp b/src/TrackerUtils.cpp index bec2531..0177bb8 100644 --- a/src/TrackerUtils.cpp +++ b/src/TrackerUtils.cpp @@ -1,34 +1,53 @@ -#include "../include/TrackerUtils.h" #include "../include/ConcurrentQueue.h" +#include "../include/TrackerUtils.h" +#include "../include/WordUtils.h" +#include "../include/Crawler.h" #include "../include/Track.h" -#include #include +#include +#include #include #include -#include +#include +#include -void TrackerUtils::track(const shared_ptr> &t_test) { - bool running = true; - unordered_map urls; +shared_ptr> urls; + +void [[noreturn]] TrackerUtils::track(const shared_ptr>& s_track, const shared_ptr>& crawler) { + urls = crawler; + + constexpr bool running = true; while (running) { - auto [Ip, UserAgent] = t_test->wait_and_pop(); + Track track; - urls[UserAgent]++; - - system("clear"); - print(urls); - - if (Ip == "STOP") { - running = false; + if (!s_track->try_pop(track)) { + this_thread::sleep_for(chrono::milliseconds(100)); + continue; } + + vector temp = WordUtils::split_string(track.Ip, IP); + const int ip1 = stoi(temp[0]); + const int ip2 = stoi(temp[1]); + const int ip3 = stoi(temp[2]); + const int ip4 = stoi(temp[3]); + + const uint32_t ip = static_cast(ip1 << 24) | static_cast(ip2 << 16) | static_cast(ip3 << 8) | static_cast(ip4); + + (*urls)[ip].links_pressed++; + (*urls)[ip].user_agent = track.UserAgent; } } -void TrackerUtils::print(unordered_map tracks) { - for (auto it = tracks.begin(); it != tracks.end(); ++it) { - cerr << it->first << ": " << it->second << endl; - } +void [[noreturn]] TrackerUtils::print() { + this_thread::sleep_for(chrono::milliseconds(5000)); - this_thread::sleep_for(chrono::milliseconds(250)); + while (true) { + for (const auto &[user_agent, links_pressed]: *urls | views::values) { + cerr << links_pressed << ": " << user_agent << endl; + } + + this_thread::sleep_for(chrono::milliseconds(1000)); + system("clear"); // Testing only + } } diff --git a/src/WordUtils.cpp b/src/WordUtils.cpp index 136ab4e..703e248 100644 --- a/src/WordUtils.cpp +++ b/src/WordUtils.cpp @@ -54,7 +54,7 @@ vector WordUtils::predict_next_word(const string& input, const unordered string WordUtils::create_tag(const unordered_map>& word_frequencies, const char& hash) { constexpr unsigned char predict_num = 5; - const string start_words[3] = {"the", "but", "with"}; + const string start_words[] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"}; vector tags; @@ -62,15 +62,17 @@ string WordUtils::create_tag(const unordered_map outer_distribution(0, start_words->length() - 1); + const string lol = start_words[outer_distribution(generator)]; + vector temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num); uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); + tags.push_back(lol); tags.push_back(temp_words[outer_2_distribution(generator)]); // Words inside the

tag - for (unsigned short j = 0; j < 25; j++) - { + for (unsigned short j = 0; j < 25; j++) { temp_words = predict_next_word(tags[j], word_frequencies, predict_num); uniform_int_distribution inner_distribution(0, temp_words.size() - 1); @@ -80,8 +82,8 @@ string WordUtils::create_tag(const unordered_map>& word_frequencies, const unsigned long hash) { - constexpr unsigned char predict_num = 5; + constexpr unsigned char predict_num = 10; - const string start_words[10] = {"the", "but", "with", "all", "over", "and", "our", "as", "a", "to"}; + const string start_words[9] = {"the", "but", "with", "all", "over", "and", "as", "a", "to"}; vector tags; minstd_rand generator(hash); - uniform_int_distribution outer_distribution(0, start_words->length() - 1); + uniform_int_distribution outer_distribution(0, start_words->size() - 1); - vector temp_words = predict_next_word(start_words[outer_distribution(generator)], word_frequencies, predict_num); + const string lol = start_words[outer_distribution(generator)]; + vector temp_words = predict_next_word(lol, word_frequencies, predict_num); uniform_int_distribution outer_2_distribution(0, temp_words.size() - 1); + tags.push_back(lol); tags.push_back(temp_words[outer_2_distribution(generator)]); // Words inside the

tag - for (unsigned short j = 0; j < 7; j++) - { + for (unsigned short j = 0; j < 7; j++) { temp_words = predict_next_word(tags[j], word_frequencies, predict_num); uniform_int_distribution inner_distribution(0, temp_words.size() - 1); @@ -120,8 +123,8 @@ string WordUtils::create_link(const unordered_map WordUtils::split_string(const string& input, data_type type) { +vector WordUtils::split_string(const string& input, const data_type type) { vector data; size_t start = 0; size_t end = 0; - // Create a string from the delimiters array string delimiter_string; - if (type == data_type::HTML) { + if (type == HTML) { delimiter_string = " .,!?;:()\n\r\t"; } - else if (type == data_type::CSS) { + else if (type == CSS) { delimiter_string = "}\n\r\t"; } - + + else if (type == TEXT) { + delimiter_string = "\n"; + } + + else if (type == IP) { + delimiter_string = "."; + } while ((end = input.find_first_of(delimiter_string, start)) != string::npos) { if (end > start) { @@ -209,8 +217,22 @@ string WordUtils::extract_url(const string& input) { return first_line.substr(method_end + 1, path_end - method_end - 1); } +string WordUtils::extract_user_agent(const string& input) { + if (input.empty()) return ""; + + vector lines = split_string(input, TEXT); + + const unsigned short first_line_end = lines[2].find('\n'); + if (first_line_end == string::npos) return ""; + + string first_line = lines[2].substr(12, first_line_end); + + return first_line; +} + string WordUtils::extract_image_name(const string& input) { const unsigned short first_line_end = input.find('f'); + if (first_line_end == string::npos) return ""; string first_line = input.substr(1, first_line_end); @@ -244,6 +266,7 @@ unsigned int WordUtils::hash_url(const string& input) { unsigned long WordUtils::djb2Hash(const string& str) { unsigned long hash = 5381; + for (const char c : str) { hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ } diff --git a/src/main.cpp b/src/main.cpp index 4e6c1bb..6a0bb55 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,6 +7,7 @@ #include "../include/ConcurrentQueue.h" #include "../include/TrackerUtils.h" #include "../include/Track.h" +#include "../include/MetricsExporter.h" using namespace std; @@ -23,9 +24,14 @@ int main(int argc, const char* argv[]) { return 0; }*/ - auto queue = std::make_shared>(); + cout << "lol" << endl; - thread(TrackerUtils::track, queue).detach(); + auto queue = std::make_shared>(); + auto metrics = std::make_shared>(); + + thread(TrackerUtils::track, queue, metrics).detach(); + + thread(MetricsExporter::serve, metrics).detach(); //argv[1] signal(SIGPIPE, SIG_IGN);