Skip to content

Commit

Permalink
move PuppeterSharp ParseAsync method
Browse files Browse the repository at this point in the history
  • Loading branch information
win7user10 committed May 27, 2024
1 parent 9554a7d commit 5c4c8f8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using Laraue.Crawling.Abstractions;
using PuppeteerSharp;

namespace Laraue.Crawling.Dynamic.PuppeterSharp.Extensions;

/// <summary>
/// Transformers from opened browser page to the crawling model.
/// </summary>
public static class DocumentSchemaParserExtensions
{
/// <summary>
/// Take the opened page, use the passed schema and returns the crawling result.
/// </summary>
/// <param name="parser"></param>
/// <param name="page"></param>
/// <param name="schema"></param>
/// <typeparam name="TResult"></typeparam>
/// <returns></returns>
/// <exception cref="InvalidOperationException"></exception>
public static async Task<TResult> ParseAsync<TResult>(
this IDocumentSchemaParser<IElementHandle, HtmlSelector> parser,
IPage page,
ICompiledDocumentSchema<IElementHandle, HtmlSelector, TResult> schema)
{
var element = await page.QuerySelectorAsync("body")
.ConfigureAwait(false);

return await parser.RunAsync(schema, element).ConfigureAwait(false)
?? throw new InvalidOperationException("Tag <body> has not been found in the passed page");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ public static class ServiceCollectionExtensions
{
return serviceCollection
.AddSingleton<IBrowserFactory>(sp => new BrowserFactory(launchOptions, sp.GetRequiredService<ILoggerFactory>()))
.AddSingleton<IDocumentSchemaParser<IElementHandle, HtmlSelector>, PuppeterSharpParser>()
.AddSingleton<IPageParser, PageParser>();
.AddSingleton<IDocumentSchemaParser<IElementHandle, HtmlSelector>, PuppeterSharpParser>();
}
}

0 comments on commit 5c4c8f8

Please sign in to comment.