diff --git a/Backend/Backend.csproj b/Backend/Backend.csproj
index 5c26c51..95c5c1e 100644
--- a/Backend/Backend.csproj
+++ b/Backend/Backend.csproj
@@ -16,6 +16,7 @@
+
diff --git a/Backend/Handler/ContentFilter.cs b/Backend/Handler/ContentFilter.cs
index 8d78691..b6930b6 100644
--- a/Backend/Handler/ContentFilter.cs
+++ b/Backend/Handler/ContentFilter.cs
@@ -161,15 +161,15 @@ public class ContentFilter
for (int i = 0; i < ports.Length; i++)
{
- string? html = "";
-
if (ports[i] == 80)
{
if (string.IsNullOrWhiteSpace(url1)) continue;
-
+
try
{
- html = HttpClientHelper.GetHtml(url1, 80).GetAwaiter().GetResult();
+ (string, string) temp = HttpClientHelper.GetTitleAndDescription(url1, 80).GetAwaiter().GetResult();
+ title1 = temp.Item1;
+ description1 = temp.Item2;
}
catch
{
@@ -179,10 +179,12 @@ public class ContentFilter
else
{
if (string.IsNullOrWhiteSpace(url2)) continue;
-
+
try
{
- html = HttpClientHelper.GetHtml(url2, 443).GetAwaiter().GetResult();
+ (string, string) temp = HttpClientHelper.GetTitleAndDescription(url1, 443).GetAwaiter().GetResult();
+ title2 = temp.Item1;
+ description2 = temp.Item2;
}
catch
{
@@ -190,12 +192,6 @@ public class ContentFilter
}
}
- if (string.IsNullOrWhiteSpace(html)) continue;
-
- if (ports[i] == 80 && string.IsNullOrWhiteSpace(title1)) { FilterHelper.GetTitle(html, out title1); }
- if (ports[i] == 443 && string.IsNullOrWhiteSpace(title2)) { FilterHelper.GetTitle(html ,out title2); }
- if (ports[i] == 80 && string.IsNullOrWhiteSpace(description1)) { FilterHelper.GetDescription(html, out description1); }
- if (ports[i] == 443 && string.IsNullOrWhiteSpace(description2)) { FilterHelper.GetDescription(html, out description2); }
if (ports[i] == 80 && !robotsTxt1) { robotsTxt1 = HttpClientHelper.HasRobotsTxt(url1, 80).GetAwaiter().GetResult(); }
if (ports[i] == 443 && !robotsTxt2) { robotsTxt2 = HttpClientHelper.HasRobotsTxt(url2, 443).GetAwaiter().GetResult(); }
}
diff --git a/Backend/Handler/ThreadHandler.cs b/Backend/Handler/ThreadHandler.cs
index f92d076..784c270 100644
--- a/Backend/Handler/ThreadHandler.cs
+++ b/Backend/Handler/ThreadHandler.cs
@@ -59,7 +59,7 @@ public class ThreadHandler
{
Thread.Sleep(5000); // Let the database handler instantiate and warm up first.
- List wait = _ipScanner.Start(32);
+ List wait = _ipScanner.Start(128);
for (int i = 0; i < wait.Count; i++)
{
diff --git a/Backend/Helper/HttpClientHelper.cs b/Backend/Helper/HttpClientHelper.cs
index 25c597a..444c16c 100644
--- a/Backend/Helper/HttpClientHelper.cs
+++ b/Backend/Helper/HttpClientHelper.cs
@@ -1,10 +1,18 @@
+using System.Text.RegularExpressions;
+using HtmlAgilityPack;
+
namespace Backend.Helper;
-public static class HttpClientHelper
+public static partial class HttpClientHelper
{
+ // Reddit, for example, will block the GET request if you don't have a user agent.
private const string UserAgentHeader = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
+ private const string TitlePattern = "(.*)";
+ private const string DescriptionPattern = " GetHtml(string url, int port)
+ public static async Task<(string, string)> GetTitleAndDescription(string url, int port)
{
using HttpClient client = new();
@@ -29,15 +37,48 @@ public static class HttpClientHelper
}
catch
{
- return "";
+ return ("", "");
}
if (!response.IsSuccessStatusCode)
{
- return "";
+ return ("", "");
+ }
+
+ string html = await response.Content.ReadAsStringAsync();
+
+ int firstIndex = 0;
+ int lastIndex = 0;
+
+ if (html.Contains(StartHeadTag) && html.Contains(EndHeadTag))
+ {
+ firstIndex = html.IndexOf(StartHeadTag, StringComparison.Ordinal);
+ lastIndex = html.IndexOf(EndHeadTag, StringComparison.Ordinal);
}
- return await response.Content.ReadAsStringAsync();
+ string head = html.AsSpan().Slice(firstIndex, lastIndex).ToString();
+ html = "";
+
+ string title = "";
+ string description = "";
+
+ Regex titleRegex = TitleRegEx();
+ Match titleMatch = titleRegex.Match(head);
+
+ if (titleMatch.Success)
+ {
+ title = titleMatch.Groups[1].Value;
+ }
+
+ Regex descriptionRegex = DexcriptionRegEx();
+ Match descriptionMatch = descriptionRegex.Match(head);
+
+ if (descriptionMatch.Success)
+ {
+ description = descriptionMatch.Groups[1].Value;
+ }
+
+ return (title, description);
}
public static async Task HasRobotsTxt(string url, int port)
@@ -70,4 +111,9 @@ public static class HttpClientHelper
return response is not null && response.IsSuccessStatusCode;
}
+
+ [GeneratedRegex(TitlePattern)]
+ private static partial Regex TitleRegEx();
+ [GeneratedRegex(DescriptionPattern)]
+ private static partial Regex DexcriptionRegEx();
}
\ No newline at end of file
diff --git a/RSE.sln b/RSE.sln
index f528fd9..7851f8c 100644
--- a/RSE.sln
+++ b/RSE.sln
@@ -8,8 +8,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Manager", "Manager\Manager.
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Proxy", "Proxy\Proxy.csproj", "{55208481-5203-4B25-A20D-4EF644F76773}"
EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Shared", "Shared\Shared.csproj", "{DEB1411C-F45A-40DA-92F8-D9B9929DBA5B}"
-EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -32,9 +30,5 @@ Global
{55208481-5203-4B25-A20D-4EF644F76773}.Debug|Any CPU.Build.0 = Debug|Any CPU
{55208481-5203-4B25-A20D-4EF644F76773}.Release|Any CPU.ActiveCfg = Release|Any CPU
{55208481-5203-4B25-A20D-4EF644F76773}.Release|Any CPU.Build.0 = Release|Any CPU
- {DEB1411C-F45A-40DA-92F8-D9B9929DBA5B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {DEB1411C-F45A-40DA-92F8-D9B9929DBA5B}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {DEB1411C-F45A-40DA-92F8-D9B9929DBA5B}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {DEB1411C-F45A-40DA-92F8-D9B9929DBA5B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
diff --git a/RSE.sln.DotSettings.user b/RSE.sln.DotSettings.user
index a5706a8..37aa8f6 100644
--- a/RSE.sln.DotSettings.user
+++ b/RSE.sln.DotSettings.user
@@ -1,9 +1,11 @@
ForceIncluded
+ ForceIncluded
ForceIncluded
ForceIncluded
ForceIncluded
ForceIncluded
ForceIncluded
ForceIncluded
+ ForceIncluded
ForceIncluded
\ No newline at end of file
diff --git a/Shared/Class1.cs b/Shared/Class1.cs
deleted file mode 100644
index eb85986..0000000
--- a/Shared/Class1.cs
+++ /dev/null
@@ -1,5 +0,0 @@
-namespace Shared;
-
-public class Class1
-{
-}
\ No newline at end of file
diff --git a/Shared/Shared.csproj b/Shared/Shared.csproj
deleted file mode 100644
index 3a63532..0000000
--- a/Shared/Shared.csproj
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
- net8.0
- enable
- enable
-
-
-