From a0eb21aada04756a75ab57ebf576bc5675baf331 Mon Sep 17 00:00:00 2001 From: Simon <63975668+Simyon264@users.noreply.github.com> Date: Sun, 5 May 2024 00:38:16 +0200 Subject: [PATCH] Add ability to load sources which are not a nginx file listing. --- Server/Api/ReplayController.cs | 2 +- Server/Program.cs | 3 +- Server/ReplayLoading/CaddyProvider.cs | 56 +++++++++ Server/ReplayLoading/DummyProvider.cs | 14 +++ Server/ReplayLoading/NginxProvider.cs | 54 +++++++++ Server/ReplayLoading/ReplayProvider.cs | 20 ++++ Server/ReplayLoading/ReplayProviderFactory.cs | 22 ++++ Server/{ => ReplayParser}/ReplayParser.cs | 111 ++++++------------ Server/appsettings.json | 35 ++++-- 9 files changed, 232 insertions(+), 85 deletions(-) create mode 100644 Server/ReplayLoading/CaddyProvider.cs create mode 100644 Server/ReplayLoading/DummyProvider.cs create mode 100644 Server/ReplayLoading/NginxProvider.cs create mode 100644 Server/ReplayLoading/ReplayProvider.cs create mode 100644 Server/ReplayLoading/ReplayProviderFactory.cs rename Server/{ => ReplayParser}/ReplayParser.cs (70%) diff --git a/Server/Api/ReplayController.cs b/Server/Api/ReplayController.cs index f761fe8..b9ff1f3 100644 --- a/Server/Api/ReplayController.cs +++ b/Server/Api/ReplayController.cs @@ -34,7 +34,7 @@ public async Task UploadReplay(IFormFile file) } var stream = file.OpenReadStream(); - var replay = ReplayParser.ParseReplay(stream); + var replay = ReplayParser.ReplayParser.ParseReplay(stream); stream.Close(); _context.Replays.Add(replay); diff --git a/Server/Program.cs b/Server/Program.cs index dd3a588..bc95a0f 100644 --- a/Server/Program.cs +++ b/Server/Program.cs @@ -6,6 +6,7 @@ using Serilog.AspNetCore; using Server; using Server.Api; +using Server.ReplayParser; Log.Logger = new LoggerConfiguration() .WriteTo.Console() @@ -93,7 +94,7 @@ // Run FetchReplays in a new thread. var tokens = new List(); - var URLs = builder.Configuration.GetSection("ReplayUrls").Get(); + var URLs = builder.Configuration.GetSection("ReplayUrls").Get(); if (URLs == null) { throw new Exception("No replay URLs found in appsettings.json. Please set ReplayUrls to an array of URLs."); diff --git a/Server/ReplayLoading/CaddyProvider.cs b/Server/ReplayLoading/CaddyProvider.cs new file mode 100644 index 0000000..1bd9e22 --- /dev/null +++ b/Server/ReplayLoading/CaddyProvider.cs @@ -0,0 +1,56 @@ +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace Server.ReplayLoading; + +[ReplayProviderName("caddy")] +public class CaddyProvider : ReplayProvider +{ + public override async Task RetrieveFilesRecursive(string directoryUrl, CancellationToken token) + { + var httpClient = ReplayParser.ReplayParser.CreateHttpClient(); + httpClient.DefaultRequestHeaders.Add("Accept", "application/json"); + + var responseText = await httpClient.GetStringAsync(directoryUrl, token); + var response = JsonSerializer.Deserialize(responseText); + if (response == null) + { + return; + } + + foreach (var caddyResponse in response) + { + if (caddyResponse.Name.EndsWith(".zip", StringComparison.Ordinal)) + { + if (caddyResponse.LastModified < ReplayParser.ReplayParser.CutOffDateTime) + { + continue; + } + + await ReplayParser.ReplayParser.AddReplayToQueue(directoryUrl + caddyResponse.Name); + } + else if (caddyResponse.IsDir) + { + await RetrieveFilesRecursive(directoryUrl + caddyResponse.Name, token); + } + } + } + + internal class CaddyResponse + { + [JsonPropertyName("name")] + public string Name { get; set; } + [JsonPropertyName("size")] + public int Size { get; set; } + [JsonPropertyName("url")] + public string Url { get; set; } + [JsonPropertyName("mod_time")] + public DateTime LastModified { get; set; } + [JsonPropertyName("mode")] + public long Mode { get; set; } + [JsonPropertyName("is_dir")] + public bool IsDir { get; set; } + [JsonPropertyName("is_symlink")] + public bool IsSymlink { get; set; } + } +} \ No newline at end of file diff --git a/Server/ReplayLoading/DummyProvider.cs b/Server/ReplayLoading/DummyProvider.cs new file mode 100644 index 0000000..def5445 --- /dev/null +++ b/Server/ReplayLoading/DummyProvider.cs @@ -0,0 +1,14 @@ +namespace Server.ReplayLoading; + +/// +/// Represents a replay provider that can retrieve replay files from a directory. +/// This will never add any replays to the queue. It is used to temporarily disable some sources. +/// +[ReplayProviderName("dummy")] +public class DummyProvider : ReplayProvider +{ + public override Task RetrieveFilesRecursive(string directoryUrl, CancellationToken token) + { + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/Server/ReplayLoading/NginxProvider.cs b/Server/ReplayLoading/NginxProvider.cs new file mode 100644 index 0000000..3d42c6c --- /dev/null +++ b/Server/ReplayLoading/NginxProvider.cs @@ -0,0 +1,54 @@ +using HtmlAgilityPack; +using Serilog; + +namespace Server.ReplayLoading; + +[ReplayProviderName("nginx")] +public class NginxProvider : ReplayProvider +{ + public override async Task RetrieveFilesRecursive(string directoryUrl, CancellationToken token) + { + Log.Information("Retrieving files from " + directoryUrl); + var client = ReplayParser.ReplayParser.CreateHttpClient(); + var htmlContent = await client.GetStringAsync(directoryUrl, token); + var document = new HtmlDocument(); + document.LoadHtml(htmlContent); + + var links = document.DocumentNode.SelectNodes("//a[@href]"); + if (links == null) + { + Log.Information("No links found on " + directoryUrl + "."); + return; + } + + foreach (var link in links) + { + if (token.IsCancellationRequested) + { + return; + } + + var href = link.Attributes["href"].Value; + + if (href.StartsWith("..", StringComparison.Ordinal)) + { + continue; + } + + if (!Uri.TryCreate(href, UriKind.Absolute, out _)) + { + href = new Uri(new Uri(directoryUrl), href).ToString(); + } + + if (href.EndsWith("/", StringComparison.Ordinal)) + { + await RetrieveFilesRecursive(href, token); + } + + if (href.EndsWith(".zip", StringComparison.Ordinal)) + { + await ReplayParser.ReplayParser.AddReplayToQueue(href); + } + } + } +} \ No newline at end of file diff --git a/Server/ReplayLoading/ReplayProvider.cs b/Server/ReplayLoading/ReplayProvider.cs new file mode 100644 index 0000000..05b2fbb --- /dev/null +++ b/Server/ReplayLoading/ReplayProvider.cs @@ -0,0 +1,20 @@ +namespace Server.ReplayLoading; + +public abstract class ReplayProvider +{ + public abstract Task RetrieveFilesRecursive(string directoryUrl, CancellationToken token); +} + +/// +/// +/// +[AttributeUsage(AttributeTargets.Class)] +public class ReplayProviderNameAttribute : Attribute +{ + public string Name { get; } + + public ReplayProviderNameAttribute(string name) + { + Name = name; + } +} \ No newline at end of file diff --git a/Server/ReplayLoading/ReplayProviderFactory.cs b/Server/ReplayLoading/ReplayProviderFactory.cs new file mode 100644 index 0000000..e0f7e37 --- /dev/null +++ b/Server/ReplayLoading/ReplayProviderFactory.cs @@ -0,0 +1,22 @@ +using System.Reflection; + +namespace Server.ReplayLoading; + +public class ReplayProviderFactory +{ + public static ReplayProvider GetProvider(string providerName) + { + var type = Assembly.GetExecutingAssembly().GetTypes().FirstOrDefault(t => t.GetCustomAttribute()?.Name == providerName); + if (type == null) + { + throw new ArgumentException("Invalid provider name."); + } + + if (!typeof(ReplayProvider).IsAssignableFrom(type)) + { + throw new ArgumentException("Invalid provider type."); + } + + return (ReplayProvider) Activator.CreateInstance(type)!; + } +} \ No newline at end of file diff --git a/Server/ReplayParser.cs b/Server/ReplayParser/ReplayParser.cs similarity index 70% rename from Server/ReplayParser.cs rename to Server/ReplayParser/ReplayParser.cs index caf723d..1a70eca 100644 --- a/Server/ReplayParser.cs +++ b/Server/ReplayParser/ReplayParser.cs @@ -1,18 +1,14 @@ -using System.Diagnostics; -using System.Globalization; +using System.Globalization; using System.IO.Compression; -using System.Text.RegularExpressions; -using HtmlAgilityPack; using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Query; -using Microsoft.Extensions.Caching.Memory; using Serilog; using Server.Api; +using Server.ReplayLoading; using Shared; using Shared.Models; using YamlDotNet.Serialization; -namespace Server; +namespace Server.ReplayParser; public static class ReplayParser { @@ -139,7 +135,7 @@ public static async Task ConsumeQueue(CancellationToken token) /// /// Handles fetching replays from the remote storage. /// - public static async Task FetchReplays(CancellationToken token, string[] storageUrls) + public static async Task FetchReplays(CancellationToken token, StorageUrl[] storageUrls) { while (!token.IsCancellationRequested) { @@ -148,7 +144,8 @@ public static async Task FetchReplays(CancellationToken token, string[] storageU Log.Information("Fetching replays from " + storageUrl); try { - await RetrieveFilesRecursive(storageUrl, token); + var provider = ReplayProviderFactory.GetProvider(storageUrl.Provider); + await provider.RetrieveFilesRecursive(storageUrl.Url, token); } catch (Exception e) { @@ -164,80 +161,31 @@ public static async Task FetchReplays(CancellationToken token, string[] storageU await Task.Delay(delay, token); } } - - private static async Task RetrieveFilesRecursive(string directoryUrl, CancellationToken token) + + public static async Task AddReplayToQueue(string replay) { - try + // Use regex to check and retrieve the date from the file name. + var fileName = Path.GetFileName(replay); + var match = RegexList.ReplayRegex.Match(fileName); + if (match.Success) { - Log.Information("Retrieving files from " + directoryUrl); - var client = CreateHttpClient(); - var htmlContent = await client.GetStringAsync(directoryUrl, token); - var document = new HtmlDocument(); - document.LoadHtml(htmlContent); - - var links = document.DocumentNode.SelectNodes("//a[@href]"); - if (links == null) + var date = DateTime.ParseExact(match.Groups[1].Value, "yyyy_MM_dd-HH_mm", CultureInfo.InvariantCulture); + if (date < CutOffDateTime) { - Log.Information("No links found on " + directoryUrl + "."); return; } - - foreach (var link in links) - { - if (token.IsCancellationRequested) - { - return; - } - - var href = link.Attributes["href"].Value; - - if (href.StartsWith("..", StringComparison.Ordinal)) - { - continue; - } - - if (!Uri.TryCreate(href, UriKind.Absolute, out _)) - { - href = new Uri(new Uri(directoryUrl), href).ToString(); - } - - if (href.EndsWith("/", StringComparison.Ordinal)) - { - await RetrieveFilesRecursive(href, token); - } - - if (href.EndsWith(".zip", StringComparison.Ordinal)) - { - // Use regex to check and retrieve the date from the file name. - var fileName = Path.GetFileName(href); - var match = RegexList.ReplayRegex.Match(fileName); - if (match.Success) - { - var date = DateTime.ParseExact(match.Groups[1].Value, "yyyy_MM_dd-HH_mm", CultureInfo.InvariantCulture); - if (date < CutOffDateTime) - { - continue; - } - - // If it's already in the database, skip it. - if (await IsReplayParsed(href)) - { - continue; - } - Log.Information("Adding " + href + " to the queue."); - // Check if it's already in the queue. - if (!Queue.Contains(href)) - { - Queue.Add(href); - } - } - } - } } - catch (Exception e) + + // If it's already in the database, skip it. + if (await IsReplayParsed(replay)) { - Log.Error(e, "Error while retrieving files from " + directoryUrl); - // We don't care about the exception, we just want to return the files we have. + return; + } + Log.Information("Adding " + replay + " to the queue."); + // Check if it's already in the queue. + if (!Queue.Contains(replay)) + { + Queue.Add(replay); } } @@ -284,3 +232,14 @@ public static HttpClient CreateHttpClient() return client; } } + +public class StorageUrl +{ + public string Url { get; set; } + public string Provider { get; set; } + + public override string ToString() + { + return Url; + } +} \ No newline at end of file diff --git a/Server/appsettings.json b/Server/appsettings.json index e1b0391..8f748b4 100644 --- a/Server/appsettings.json +++ b/Server/appsettings.json @@ -14,12 +14,33 @@ } }, "ReplayUrls": [ - "https://moon.spacestation14.com/replays/leviathan/", - "https://moon.spacestation14.com/replays/lizard/", - "https://moon.spacestation14.com/replays/miros/", - "https://moon.spacestation14.com/replays/salamander/", - "https://moon.spacestation14.com/replays/vulture/", - "https://replays.delta-v.org/apoapsis/", - "https://replays.delta-v.org/periapsis/" + { + "url": "https://moon.spacestation14.com/replays/leviathan/", + "provider": "nginx" + }, + { + "url": "https://moon.spacestation14.com/replays/lizard/", + "provider": "nginx" + }, + { + "url": "https://moon.spacestation14.com/replays/miros/", + "provider": "nginx" + }, + { + "url": "https://moon.spacestation14.com/replays/salamander/", + "provider": "nginx" + }, + { + "url": "https://moon.spacestation14.com/replays/vulture/", + "provider": "nginx" + }, + { + "url": "https://replays.delta-v.org/apoapsis/", + "provider": "nginx" + }, + { + "url": "https://replays.delta-v.org/periapsis/", + "provider": "nginx" + } ] }