Skip to content

Commit

Permalink
update job API
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilya Belyanskiy committed Jun 10, 2023
1 parent 0a26876 commit 30482ac
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 30 deletions.
56 changes: 28 additions & 28 deletions src/Laraue.Crawling.Crawler/BaseCrawlerJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ public abstract class BaseCrawlerJob<TModel, TLink, TState> : ICrawlerJob<TState
where TState : class, new()
{
private readonly ILogger<BaseCrawlerJob<TModel, TLink, TState>> _logger;

/// <inheritdoc />
public event Func<JobState<TState>, CancellationToken, Task>? OnStateUpdated;

/// <summary>
/// Initializes a new instance of <see cref="BaseCrawlerJob{TModel,TLink,TState}"/>.
Expand All @@ -31,32 +34,34 @@ public async Task<TimeSpan> ExecuteAsync(JobState<TState> jobState, Cancellation
var sessionStopwatch = new Stopwatch();
sessionStopwatch.Start();

await OnSessionStartAsync(jobState).ConfigureAwait(false);
await OnSessionStartAsync(jobState, stoppingToken).ConfigureAwait(false);

var pageStopwatch = new Stopwatch();
pageStopwatch.Start();

var link = await GetNextLinkAsync(jobState, stoppingToken).ConfigureAwait(false);
if (link == null)

while (true)
{
_logger.LogInformation("Crawling session finished for {Time}", sessionStopwatch.Elapsed);
var link = await GetNextLinkAsync(jobState, stoppingToken).ConfigureAwait(false);
if (link == null)
{
_logger.LogInformation("Crawling session finished for {Time}", sessionStopwatch.Elapsed);

await OnSessionFinishAsync(jobState).ConfigureAwait(false);
await OnSessionFinishAsync(jobState, stoppingToken).ConfigureAwait(false);

return GetTimeToWait();
}
return GetTimeToWait();
}

_logger.LogInformation("Page {Page} processing started", link);
_logger.LogInformation("Page {Page} processing started", link);

await ParseLinkAsync(link, stoppingToken).ConfigureAwait(false);
await UpdateStateAsync(jobState, stoppingToken).ConfigureAwait(false);
await ParseLinkAsync(link, jobState.JobData, stoppingToken).ConfigureAwait(false);

_logger.LogInformation(
"Page {Page} processing finished for {Time}",
link,
pageStopwatch.Elapsed);
_logger.LogInformation(
"Page {Page} processing finished for {Time}",
link,
pageStopwatch.Elapsed);

return TimeSpan.Zero;
stoppingToken.ThrowIfCancellationRequested();
}
}

/// <summary>
Expand All @@ -66,22 +71,15 @@ public async Task<TimeSpan> ExecuteAsync(JobState<TState> jobState, Cancellation
/// <param name="cancellationToken"></param>
/// <returns></returns>
protected abstract Task<TLink?> GetNextLinkAsync(JobState<TState> state, CancellationToken cancellationToken = default);

/// <summary>
/// Execute something after one link has been parsed. Here
/// </summary>
/// <param name="state"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
protected abstract Task UpdateStateAsync(JobState<TState> state, CancellationToken cancellationToken = default);


/// <summary>
/// The body of parsing.
/// </summary>
/// <param name="link"></param>
/// <param name="state"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
protected abstract Task<TModel?> ParseLinkAsync(TLink link, CancellationToken cancellationToken = default);
protected abstract Task<TModel?> ParseLinkAsync(TLink link, TState state, CancellationToken cancellationToken = default);

/// <summary>
/// Return how long to wait before the next crawling session.
Expand All @@ -93,13 +91,15 @@ public async Task<TimeSpan> ExecuteAsync(JobState<TState> jobState, Cancellation
/// Do something when crawling session finished.
/// </summary>
/// <param name="state"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
protected abstract Task OnSessionStartAsync(JobState<TState> state);
protected abstract Task OnSessionStartAsync(JobState<TState> state, CancellationToken cancellationToken = default);

/// <summary>
/// Do something when crawling session started.
/// </summary>
/// <param name="state"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
protected abstract Task OnSessionFinishAsync(JobState<TState> state);
protected abstract Task OnSessionFinishAsync(JobState<TState> state, CancellationToken cancellationToken = default);
}
4 changes: 2 additions & 2 deletions src/Laraue.Crawling.Crawler/Laraue.Crawling.Crawler.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Laraue.Core.Extensions.Hosting" Version="7.2.0" />
<PackageReference Include="Laraue.Core.Extensions.Hosting" Version="7.2.1" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.0" />
<PackageReference Include="Laraue.Core.Extensions.Hosting.EfCore" Version="7.2.0" />
<PackageReference Include="Laraue.Core.Extensions.Hosting.EfCore" Version="7.2.1" />
</ItemGroup>

<ItemGroup>
Expand Down

0 comments on commit 30482ac

Please sign in to comment.