diff --git a/.gitignore b/.gitignore index e274d0f..b84bcc2 100644 --- a/.gitignore +++ b/.gitignore @@ -91,4 +91,6 @@ dkms.conf *.avif CMakeFiles/ build/ -convert.sh \ No newline at end of file +convert.sh +Testing/ +*.log diff --git a/Makefile b/Makefile index a8a6589..e2f88a9 100644 --- a/Makefile +++ b/Makefile @@ -48,10 +48,10 @@ cmake_force: SHELL = /bin/sh # The CMake executable. -CMAKE_COMMAND = /usr/bin/cmake +CMAKE_COMMAND = "/home/skingging/Tar Apps/clion-2025.1.1/bin/cmake/linux/x64/bin/cmake" # The command to remove a file. -RM = /usr/bin/cmake -E rm -f +RM = "/home/skingging/Tar Apps/clion-2025.1.1/bin/cmake/linux/x64/bin/cmake" -E rm -f # Escaping for special characters. EQUALS = = @@ -78,7 +78,7 @@ edit_cache/fast: edit_cache # Special rule for the target rebuild_cache rebuild_cache: @$(CMAKE_COMMAND) -E cmake_echo_color "--switch=$(COLOR)" --cyan "Running CMake to regenerate build system..." - /usr/bin/cmake --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) + "/home/skingging/Tar Apps/clion-2025.1.1/bin/cmake/linux/x64/bin/cmake" --regenerate-during-build -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) .PHONY : rebuild_cache # Special rule for the target rebuild_cache @@ -177,6 +177,30 @@ src/ServerUtils.cpp.s: $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/ServerUtils.cpp.s .PHONY : src/ServerUtils.cpp.s +src/TrackerUtils.o: src/TrackerUtils.cpp.o +.PHONY : src/TrackerUtils.o + +# target to build an object file +src/TrackerUtils.cpp.o: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/TrackerUtils.cpp.o +.PHONY : src/TrackerUtils.cpp.o + +src/TrackerUtils.i: src/TrackerUtils.cpp.i +.PHONY : src/TrackerUtils.i + +# target to preprocess a source file +src/TrackerUtils.cpp.i: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/TrackerUtils.cpp.i +.PHONY : src/TrackerUtils.cpp.i + +src/TrackerUtils.s: src/TrackerUtils.cpp.s +.PHONY : src/TrackerUtils.s + +# target to generate assembly for a file +src/TrackerUtils.cpp.s: + $(MAKE) $(MAKESILENT) -f CMakeFiles/tarpit.dir/build.make CMakeFiles/tarpit.dir/src/TrackerUtils.cpp.s +.PHONY : src/TrackerUtils.cpp.s + src/WordUtils.o: src/WordUtils.cpp.o .PHONY : src/WordUtils.o @@ -240,6 +264,9 @@ help: @echo "... src/ServerUtils.o" @echo "... src/ServerUtils.i" @echo "... src/ServerUtils.s" + @echo "... src/TrackerUtils.o" + @echo "... src/TrackerUtils.i" + @echo "... src/TrackerUtils.s" @echo "... src/WordUtils.o" @echo "... src/WordUtils.i" @echo "... src/WordUtils.s" diff --git a/include/ConcurrentQueue.h b/include/ConcurrentQueue.h new file mode 100644 index 0000000..c4b41eb --- /dev/null +++ b/include/ConcurrentQueue.h @@ -0,0 +1,54 @@ +#ifndef CONCURRENTQUEUE_H +#define CONCURRENTQUEUE_H + +#include +#include +#include + +using namespace std; + +template +class ConcurrentQueue { +private: + std::queue queue_; + mutable std::mutex mutex_; + std::condition_variable condition_; + +public: + // Apparently, if you have a mutex in a class, you can't copy or assign the class to any other class. + ConcurrentQueue() = default; + ConcurrentQueue(const ConcurrentQueue&) = delete; // Prevent copying + ConcurrentQueue& operator=(const ConcurrentQueue&) = delete; // Prevent assignment + + void push(T value) { + lock_guard lock(mutex_); + queue_.push(move(value)); + condition_.notify_one(); + } + + bool try_pop(T& value) { + lock_guard lock(mutex_); + if (queue_.empty()) { + return false; + } + value = move(queue_.front()); + queue_.pop(); + return true; + } + + T wait_and_pop() { + unique_lock lock(mutex_); + condition_.wait(lock, [this] { return !queue_.empty(); }); + T value = move(queue_.front()); + queue_.pop(); + + return value; + } + + bool empty() const { + lock_guard lock(mutex_); + return queue_.empty(); + } +}; + +#endif diff --git a/include/FileUtils.h b/include/FileUtils.h index f1e24fa..d653e50 100644 --- a/include/FileUtils.h +++ b/include/FileUtils.h @@ -10,6 +10,7 @@ struct FileUtils { static bool fileExists(const char *path); static vector open_image(const string& path); static vector get_image_list(const string& path); + static vector get_wordlists(const string& path); }; #endif \ No newline at end of file diff --git a/include/ServerUtils.h b/include/ServerUtils.h index d97ba4d..98b8c0e 100644 --- a/include/ServerUtils.h +++ b/include/ServerUtils.h @@ -4,12 +4,14 @@ #include #include #include "../include/DataType.h" +#include "../include/Track.h" +#include "../include/ConcurrentQueue.h" using namespace std; class ServerUtils { public: - static void serve(); + static void serve(shared_ptr> t_test); private: static void process_request(int client_fd); static void send_header(int client_fd, data_type type); @@ -17,7 +19,8 @@ class ServerUtils { static void send_chunked_css(int client_fd); static void send_data(int client_fd, const string& data); static void send_image(int client_fd, const string& path, image_type type); - static size_t send_all(int sockfd, const char* data, size_t length); + static size_t send_all(int client_fd, const char* data, size_t length); + static string get_ip(int client_fd); }; const string HTML_RESPONSE_HEADER = diff --git a/include/Track.h b/include/Track.h new file mode 100644 index 0000000..1d0594d --- /dev/null +++ b/include/Track.h @@ -0,0 +1,12 @@ +#ifndef TRACK_H +#define TRACK_H +#include + +using namespace std; + +struct Track { + string Ip; + string UserAgent; +}; + +#endif //TRACK_H diff --git a/include/TrackerUtils.h b/include/TrackerUtils.h new file mode 100644 index 0000000..ed06326 --- /dev/null +++ b/include/TrackerUtils.h @@ -0,0 +1,16 @@ +#ifndef TRACKERUTILS_H +#define TRACKERUTILS_H + +#include + +#include "../include/ConcurrentQueue.h" +#include "../include/Track.h" + +using namespace std; + +struct TrackerUtils { + static void track(const shared_ptr>& t_test); + static void print(unordered_map tracks); +}; + +#endif diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp index b2a4afe..83bc79c 100644 --- a/src/FileUtils.cpp +++ b/src/FileUtils.cpp @@ -36,5 +36,18 @@ vector FileUtils::get_image_list(const string& path) { } } + return images; +} + +vector FileUtils::get_wordlists(const string& path) { + vector images; + const std::string ext(".txt"); + for (auto &p : filesystem::recursive_directory_iterator(path)) + { + if (p.path().extension() == ext) { + images.push_back(p.path().string()); + } + } + return images; } \ No newline at end of file diff --git a/src/ServerUtils.cpp b/src/ServerUtils.cpp index ed866c8..80a6fab 100644 --- a/src/ServerUtils.cpp +++ b/src/ServerUtils.cpp @@ -4,6 +4,7 @@ #include "../include/FileUtils.h" #include +#include #include #include #include @@ -13,15 +14,24 @@ #include #include #include +#include +#include + vector css; vector images; -unordered_map> word_frequencies; +vector>> all_lists; +shared_ptr> t_test; -void ServerUtils::serve() { - word_frequencies = WordUtils::load_data("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/wordlist-Food.txt"); +void [[noreturn]] ServerUtils::serve(shared_ptr> test) { + t_test = std::move(test); css = WordUtils::load_css("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/style.css"); images = FileUtils::get_image_list("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/"); + const vector words = FileUtils::get_wordlists("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/"); + + for (const auto& word : words) { + all_lists.push_back(WordUtils::load_data(word)); + } // server_fd is a file descriptor. const int server_fd = socket(AF_INET, SOCK_STREAM, 0); @@ -30,9 +40,13 @@ void ServerUtils::serve() { addr.sin_family = AF_INET; addr.sin_port = htons(8888); addr.sin_addr.s_addr = INADDR_ANY; - - bind(server_fd, reinterpret_cast(&addr), sizeof(addr)); - listen(server_fd, 15); + + if (bind(server_fd, reinterpret_cast(&addr), sizeof(addr)) != 0) { + cout << "Please wait for the pipe to close."; + return; + } + + listen(server_fd, 50); cout << "Server is running on http://localhost:8888 \n"; @@ -47,7 +61,7 @@ void ServerUtils::serve() { void ServerUtils::process_request(const int client_fd) { char buffer[1024]; - const int bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0); + const unsigned long bytes_received = recv(client_fd, buffer, sizeof(buffer) - 1, 0); string url; @@ -55,7 +69,12 @@ void ServerUtils::process_request(const int client_fd) { buffer[bytes_received] = '\0'; url = WordUtils::extract_url(string(buffer)); + + if (url.empty()) { + close(client_fd); + } } else { + close(client_fd); cerr << "AAAA \n"; } @@ -72,13 +91,17 @@ void ServerUtils::process_request(const int client_fd) { else if (WordUtils::contains_image(url)) { string p = "/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/content/images/"; p += WordUtils::extract_image_name(url); - cerr << p << endl; send_image(client_fd, p, image_type::AVIF); } else { - //unsigned int hash = WordUtils::hash_url(url); - //unsigned long hash2 = WordUtils::djb2Hash(url); + Track track; + track.Ip = get_ip(client_fd); + + // Testing only. + track.UserAgent = url; + t_test->push(track); + const unsigned long hash3 = WordUtils::fnv1aHash(url); send_header(client_fd, HTML); @@ -114,15 +137,17 @@ void ServerUtils::send_chunked_html(const int client_fd, const size_t hash) { minstd_rand generator(hash); uniform_int_distribution distribution_1(0, end - 1); uniform_int_distribution distribution_2(0, images.size() - 1); + uniform_int_distribution distribution_3(0, 8); const int link = distribution_1(generator); const int image = distribution_2(generator); + const int l = distribution_3(generator); while (itr < end) { - send_data(client_fd, WordUtils::create_tag(word_frequencies, hashes[itr])); + send_data(client_fd, WordUtils::create_tag(all_lists[l], hashes[itr])); if (itr == link) { - send_data(client_fd, WordUtils::create_link(word_frequencies, hash)); + send_data(client_fd, WordUtils::create_link(all_lists[l], hash)); } if (itr == link) { @@ -163,22 +188,23 @@ void ServerUtils::send_image(const int client_fd, const string& path, const imag oss << "Content-Length: " << image.size() << "\r\n"; oss << "\r\n"; - //send(client_fd, oss.str().c_str(), oss.str().size(), 0); - send_data(client_fd, oss.str()); + send(client_fd, oss.str().c_str(), oss.str().size(), 0); send(client_fd, reinterpret_cast(image.data()), image.size(), 0); //send_all(client_fd,reinterpret_cast(image.data()), image.size()); } -size_t ServerUtils::send_all(int sockfd, const char *data, const size_t length) { +size_t ServerUtils::send_all(const int client_fd, const char *data, const size_t length) { size_t total_sent = 0; while (total_sent < length) { - size_t sent = send(sockfd, data + total_sent, length - total_sent, 0); + const size_t sent = send(client_fd, data + total_sent, length - total_sent, 0); if (sent <= 0) { return sent; // Error or connection closed } + total_sent += sent; } + return total_sent; } @@ -187,9 +213,30 @@ void ServerUtils::send_data(const int client_fd, const string& data) { oss << hex << data.size() << "\r\n" << data << "\r\n"; - const int result = send(client_fd, oss.str().c_str(), oss.str().size(), 0); + send(client_fd, oss.str().c_str(), oss.str().size(), 0); +} - if (result == -1) { - return; +string ServerUtils::get_ip(const int client_fd) { + sockaddr_storage addr{}; + socklen_t len = sizeof(addr); + char ipstr[INET6_ADDRSTRLEN]; + + if (getpeername(client_fd, reinterpret_cast(&addr), &len) == -1) { + perror("getpeername"); + return ""; } + + if (addr.ss_family == AF_INET) { + // IPv4 + sockaddr_in* s = reinterpret_cast(&addr); + inet_ntop(AF_INET, &s->sin_addr, ipstr, sizeof(ipstr)); + } else if (addr.ss_family == AF_INET6) { + // IPv6 + sockaddr_in6* s = reinterpret_cast(&addr); + inet_ntop(AF_INET6, &s->sin6_addr, ipstr, sizeof(ipstr)); + } else { + return "Unknown address family"; + } + + return string(ipstr); } \ No newline at end of file diff --git a/src/TrackerUtils.cpp b/src/TrackerUtils.cpp new file mode 100644 index 0000000..bec2531 --- /dev/null +++ b/src/TrackerUtils.cpp @@ -0,0 +1,34 @@ +#include "../include/TrackerUtils.h" +#include "../include/ConcurrentQueue.h" +#include "../include/Track.h" +#include +#include +#include +#include +#include + +void TrackerUtils::track(const shared_ptr> &t_test) { + bool running = true; + unordered_map urls; + + while (running) { + auto [Ip, UserAgent] = t_test->wait_and_pop(); + + urls[UserAgent]++; + + system("clear"); + print(urls); + + if (Ip == "STOP") { + running = false; + } + } +} + +void TrackerUtils::print(unordered_map tracks) { + for (auto it = tracks.begin(); it != tracks.end(); ++it) { + cerr << it->first << ": " << it->second << endl; + } + + this_thread::sleep_for(chrono::milliseconds(250)); +} diff --git a/src/WordUtils.cpp b/src/WordUtils.cpp index 5d2ce8c..136ab4e 100644 --- a/src/WordUtils.cpp +++ b/src/WordUtils.cpp @@ -5,18 +5,17 @@ #include #include #include -#include #include -// We can just use a unordered_map> instead +// We can just use an unordered_map> instead unordered_map> WordUtils::load_data(const string& path) { vector data = split_string(load_file(path), data_type::HTML); unordered_map> word_frequencies = {}; for (long unsigned int i = 0; i + 1 < data.size(); i++) { - transform(data[i].begin(), data[i].end(), data[i].begin(), [](const unsigned char c){ return tolower(c); }); - transform(data[i+1].begin(), data[i+1].end(), data[i+1].begin(), [](const unsigned char c){ return tolower(c); }); + ranges::transform(data[i], data[i].begin(), [](const unsigned char c){ return tolower(c); }); + ranges::transform(data[i+1], data[i+1].begin(), [](const unsigned char c){ return tolower(c); }); word_frequencies[data[i]][data[i+1]]++; } @@ -27,7 +26,7 @@ vector WordUtils::load_css(const string& path) { return split_string(load_file(path), data_type::CSS); } -// We can just use a unordered_map> instead, since we're already sorting before counting. +// We can just use an unordered_map> instead, since we're already sorting before counting. vector WordUtils::predict_next_word(const string& input, const unordered_map>& word_frequencies, size_t count) { const auto it = word_frequencies.find(input); @@ -38,7 +37,7 @@ vector WordUtils::predict_next_word(const string& input, const unordered vector> sortedWords(nextWords.begin(), nextWords.end()); // Sort by frequency (descending) - sort(sortedWords.begin(), sortedWords.end(),[](const auto& a, const auto& b) { + ranges::sort(sortedWords,[](const auto& a, const auto& b) { return a.second > b.second; }); @@ -81,7 +80,7 @@ string WordUtils::create_tag(const unordered_map"; @@ -194,6 +193,8 @@ string WordUtils::load_file(const string& path) { } string WordUtils::extract_url(const string& input) { + if (input.empty()) return ""; + const unsigned short first_line_end = input.find('\n'); if (first_line_end == string::npos) return ""; @@ -209,7 +210,7 @@ string WordUtils::extract_url(const string& input) { } string WordUtils::extract_image_name(const string& input) { - const unsigned short first_line_end = input.find("f"); + const unsigned short first_line_end = input.find('f'); if (first_line_end == string::npos) return ""; string first_line = input.substr(1, first_line_end); @@ -222,12 +223,10 @@ bool WordUtils::contains_image(const string& input) { if (first_line_end == 65535) return false; - cerr << first_line_end; + string type = input.substr(first_line_end + 1, 5); - string type = input.substr(first_line_end, 3); - cerr << type << endl; + if (type == "avif") return true; - if (first_line_end != string::npos) return true; return false; } @@ -253,7 +252,7 @@ unsigned long WordUtils::djb2Hash(const string& str) { } unsigned long WordUtils::fnv1aHash(const std::string& str) { - const unsigned long prime = 16777619; // A commonly used prime + constexpr unsigned long prime = 16777619; // A commonly used prime unsigned long hash = 2166136261; // Initial prime value for (const char c : str) { diff --git a/src/main.cpp b/src/main.cpp index 92004e3..4e6c1bb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,8 +1,12 @@ #include +#include #include "../include/FileUtils.h" #include "../include/WordUtils.h" #include "../include/ServerUtils.h" +#include "../include/ConcurrentQueue.h" +#include "../include/TrackerUtils.h" +#include "../include/Track.h" using namespace std; @@ -19,26 +23,13 @@ int main(int argc, const char* argv[]) { return 0; }*/ + auto queue = std::make_shared>(); - /*auto lol = WordUtils::load_data("/home/skingging/Documents/Projects/CPP/AI-Tarpit-Reimagined/wordlist/wordlist-Food.txt"); - string word; - - while (true) - { - cout << "Please input word: \n"; - cin >> word; - - vector words = WordUtils::predict_next_word(word, lol, 10); - - for (const auto& out : words) - { - cout << "Next word is: " << out << endl; - } - }*/ + thread(TrackerUtils::track, queue).detach(); //argv[1] signal(SIGPIPE, SIG_IGN); - ServerUtils::serve(); + ServerUtils::serve(queue); return 0; } \ No newline at end of file