From 5de31611eb887169bc0b45212c3d4e33ac3f7d8b Mon Sep 17 00:00:00 2001 From: Timothy Stepanski Date: Tue, 8 Sep 2015 17:29:00 -0400 Subject: [PATCH] Initial checkin --- .gitattributes | 63 +++++++++++++ SteamPageParser.sln | 28 ++++++ SteamPageParser/AppPackage.cs | 21 +++++ SteamPageParser/InvalidAppException.cs | 14 +++ SteamPageParser/Navigator.cs | 88 ++++++++++++++++++ SteamPageParser/Parser.cs | 90 +++++++++++++++++++ SteamPageParser/Properties/AssemblyInfo.cs | 36 ++++++++ SteamPageParser/SteamApp.cs | 34 +++++++ SteamPageParser/SteamPageParser.csproj | 65 ++++++++++++++ SteamPageParser/packages.config | 4 + SteamPageParserRunner/App.config | 6 ++ SteamPageParserRunner/Program.cs | 31 +++++++ .../Properties/AssemblyInfo.cs | 36 ++++++++ .../SteamPageParserRunner.csproj | 66 ++++++++++++++ 14 files changed, 582 insertions(+) create mode 100644 .gitattributes create mode 100644 SteamPageParser.sln create mode 100644 SteamPageParser/AppPackage.cs create mode 100644 SteamPageParser/InvalidAppException.cs create mode 100644 SteamPageParser/Navigator.cs create mode 100644 SteamPageParser/Parser.cs create mode 100644 SteamPageParser/Properties/AssemblyInfo.cs create mode 100644 SteamPageParser/SteamApp.cs create mode 100644 SteamPageParser/SteamPageParser.csproj create mode 100644 SteamPageParser/packages.config create mode 100644 SteamPageParserRunner/App.config create mode 100644 SteamPageParserRunner/Program.cs create mode 100644 SteamPageParserRunner/Properties/AssemblyInfo.cs create mode 100644 SteamPageParserRunner/SteamPageParserRunner.csproj diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ff0c42 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,63 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain diff --git a/SteamPageParser.sln b/SteamPageParser.sln new file mode 100644 index 0000000..63c87da --- /dev/null +++ b/SteamPageParser.sln @@ -0,0 +1,28 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SteamPageParser", "SteamPageParser\SteamPageParser.csproj", "{4903B432-A0F7-4E1A-9068-2560E29BAE7C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SteamPageParserRunner", "SteamPageParserRunner\SteamPageParserRunner.csproj", "{7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {4903B432-A0F7-4E1A-9068-2560E29BAE7C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {4903B432-A0F7-4E1A-9068-2560E29BAE7C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {4903B432-A0F7-4E1A-9068-2560E29BAE7C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {4903B432-A0F7-4E1A-9068-2560E29BAE7C}.Release|Any CPU.Build.0 = Release|Any CPU + {7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/SteamPageParser/AppPackage.cs b/SteamPageParser/AppPackage.cs new file mode 100644 index 0000000..965ffd6 --- /dev/null +++ b/SteamPageParser/AppPackage.cs @@ -0,0 +1,21 @@ +namespace SteamPageParser +{ + public class AppPackage + { + private AppPackage(SteamApp associatedSteamApp) + { + AssociatedSteamApp = associatedSteamApp; + } + + public string Title { get; internal set; } + public decimal OriginalPrice { get; internal set; } + public decimal CurrentPrice { get; internal set; } + + public decimal CurrentPricePercentage => CurrentPrice/OriginalPrice; + public decimal DiscountPercentage => 1 - CurrentPricePercentage; + + public SteamApp AssociatedSteamApp { get; } + + public static AppPackage NewAppPackage(SteamApp associatedSteamApp) => new AppPackage(associatedSteamApp); + } +} \ No newline at end of file diff --git a/SteamPageParser/InvalidAppException.cs b/SteamPageParser/InvalidAppException.cs new file mode 100644 index 0000000..0b2a6e1 --- /dev/null +++ b/SteamPageParser/InvalidAppException.cs @@ -0,0 +1,14 @@ +using System; + +namespace SteamPageParser +{ + public class InvalidAppException : Exception + { + public InvalidAppException(int appId) + { + AppId = appId; + } + + public int AppId { get; } + } +} \ No newline at end of file diff --git a/SteamPageParser/Navigator.cs b/SteamPageParser/Navigator.cs new file mode 100644 index 0000000..5358985 --- /dev/null +++ b/SteamPageParser/Navigator.cs @@ -0,0 +1,88 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Threading.Tasks; + +namespace SteamPageParser +{ + public static class Navigator + { + public static string SteamAppUrl => @"http://store.steampowered.com/app/"; + + private static int MaximumAppId => 1000000; + private static int MininumAppId => 0; + private static int MaximumCoresToOccupy => 5; + + public static ParallelQuery GetSteamApps() + { + var appIdsToRun = new List(); + + for (var i = MininumAppId; i < MaximumAppId; i++) + { + appIdsToRun.Add(i); + } + + return appIdsToRun + .AsParallel() + .WithDegreeOfParallelism(MaximumCoresToOccupy) + .Select(async appId => await GetSteamApp(appId)) + .Select(sa => sa.Result) + .Where(sa => sa != null); + } + + public static async Task GetSteamApp(int appId) + { + SteamApp app = null; + + await Task.Run(() => + { + try + { + var response = GetResponse(appId); + + var html = GetHtml(appId, response); + + app = Parser.ParsePage(appId, html); + } + catch (Exception exception) + { + Debug.WriteLine(exception.Message); + } + }); + + return app; + } + + private static string GetHtml(int appId, HttpWebResponse response) + { + if (response.StatusCode != HttpStatusCode.OK) throw new InvalidAppException(appId); + + using (var receiveStream = response.GetResponseStream()) + { + if (receiveStream == null) throw new InvalidAppException(appId); + + var readStream = response.CharacterSet == null + ? new StreamReader(receiveStream) + : new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet)); + + var data = readStream.ReadToEnd(); + + response.Close(); + readStream.Close(); + + return data; + } + } + + private static HttpWebResponse GetResponse(int appId) + { + var request = (HttpWebRequest) WebRequest.Create($"{SteamAppUrl}{appId}/"); + + return (HttpWebResponse) request.GetResponse(); + } + } +} \ No newline at end of file diff --git a/SteamPageParser/Parser.cs b/SteamPageParser/Parser.cs new file mode 100644 index 0000000..4ade93f --- /dev/null +++ b/SteamPageParser/Parser.cs @@ -0,0 +1,90 @@ +using System; +using System.Linq; +using HtmlAgilityPack; + +namespace SteamPageParser +{ + public static class Parser + { + private static string AppTitleClass => "apphub_AppName"; + private static string PackageClass => "game_area_purchase_game_wrapper"; + private static string PackagePriceXPath => "game_purchase_price price"; + private static string PackageOriginalPriceXPath => "discount_original_price"; + private static string PackageDiscountPriceXPath => "discount_final_price"; + private static string PackageTitle => "h1"; + private static string ThousandsSeparator => ","; + private static string CurrencySymbol => "$"; + + public static SteamApp ParsePage(int appId, string html) + { + if (string.IsNullOrWhiteSpace(html)) throw new ArgumentNullException(nameof(html)); + + try + { + var app = SteamApp.NewSteamApp(appId, html); + + var htmlDocument = new HtmlDocument(); + + var htmlCleaned = html.Replace("\"", "'"); + + htmlDocument.LoadHtml(htmlCleaned); + + var documentNode = htmlDocument.DocumentNode; + + var titleNode = documentNode.SelectSingleNode($"//div[@class='{AppTitleClass}']"); + + app.Title = titleNode.InnerHtml.Trim(); + + var packageNodes = documentNode.SelectNodes($"//div[@class='{PackageClass}']").ToArray(); + + foreach (var packageNode in packageNodes) + { + AddPackage(app, packageNode); + } + + return app; + } + catch (Exception) + { + throw new InvalidAppException(appId); + } + } + + private static void AddPackage(SteamApp app, HtmlNode packageNode) + { + var package = app.AddNewPackage(); + + var packageTitleNode = packageNode.SelectSingleNode($"//{PackageTitle}"); + + package.Title = packageTitleNode.InnerHtml.Replace("Buy ", "").Trim(); + + var priceNodes = packageNode.SelectNodes($"//div[@class='{PackagePriceXPath}']"); + + if (priceNodes != null) + { + var priceNode = priceNodes[0]; + + package.CurrentPrice = ParseNodeWithCurrencyToDecimal(priceNode); + + package.OriginalPrice = package.CurrentPrice; + } + else + { + var originalPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageOriginalPriceXPath}']"); + + package.OriginalPrice = ParseNodeWithCurrencyToDecimal(originalPriceNode); + + var discountPriceNode = packageNode.SelectSingleNode($"//div[@class='{PackageDiscountPriceXPath}']"); + + package.CurrentPrice = ParseNodeWithCurrencyToDecimal(discountPriceNode); + } + } + + private static decimal ParseNodeWithCurrencyToDecimal(HtmlNode node) + { + var stringValue = node.InnerHtml.Replace(CurrencySymbol, "").Replace(ThousandsSeparator, ""); + + return decimal.Parse(stringValue); + } + } +} \ No newline at end of file diff --git a/SteamPageParser/Properties/AssemblyInfo.cs b/SteamPageParser/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..ef79c6f --- /dev/null +++ b/SteamPageParser/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("SteamPageParser")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("SteamPageParser")] +[assembly: AssemblyCopyright("Copyright © 2015")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("4903b432-a0f7-4e1a-9068-2560e29bae7c")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/SteamPageParser/SteamApp.cs b/SteamPageParser/SteamApp.cs new file mode 100644 index 0000000..0842f00 --- /dev/null +++ b/SteamPageParser/SteamApp.cs @@ -0,0 +1,34 @@ +using System.Collections.Generic; + +namespace SteamPageParser +{ + public class SteamApp + { + private SteamApp(int appId, string html) + { + AppId = appId; + Html = html; + + PackageList = new List(); + } + + public int AppId { get; } + public string Html { get; } + public string Title { get; internal set; } + + private List PackageList { get; } + + public AppPackage[] Packages => PackageList.ToArray(); + + internal AppPackage AddNewPackage() + { + var newPackage = AppPackage.NewAppPackage(this); + + PackageList.Add(newPackage); + + return newPackage; + } + + public static SteamApp NewSteamApp(int appId, string html) => new SteamApp(appId, html); + } +} \ No newline at end of file diff --git a/SteamPageParser/SteamPageParser.csproj b/SteamPageParser/SteamPageParser.csproj new file mode 100644 index 0000000..3e18b3c --- /dev/null +++ b/SteamPageParser/SteamPageParser.csproj @@ -0,0 +1,65 @@ + + + + + Debug + AnyCPU + {4903B432-A0F7-4E1A-9068-2560E29BAE7C} + Library + Properties + SteamPageParser + SteamPageParser + v4.5.2 + 512 + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + ..\packages\HtmlAgilityPack.1.4.9\lib\Net45\HtmlAgilityPack.dll + True + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SteamPageParser/packages.config b/SteamPageParser/packages.config new file mode 100644 index 0000000..4d5c9d8 --- /dev/null +++ b/SteamPageParser/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/SteamPageParserRunner/App.config b/SteamPageParserRunner/App.config new file mode 100644 index 0000000..88fa402 --- /dev/null +++ b/SteamPageParserRunner/App.config @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/SteamPageParserRunner/Program.cs b/SteamPageParserRunner/Program.cs new file mode 100644 index 0000000..58a1f1f --- /dev/null +++ b/SteamPageParserRunner/Program.cs @@ -0,0 +1,31 @@ +using System; +using System.Linq; +using SteamPageParser; + +namespace SteamPageParserRunner +{ + public class Program + { + public static void Main() + { + Navigator.GetSteamApps().ForAll(ShowApp); + + Console.In.ReadLine(); + } + + private static void ShowApp(SteamApp app) + { + Console.Out.WriteLine(app.Title); + + foreach (var package in app.Packages) + { + Console.Out.WriteLine("Package:"); + Console.Out.WriteLine($"\t{package.Title}"); + Console.Out.WriteLine($"\t${package.CurrentPrice}"); + Console.Out.WriteLine("------------"); + } + + Console.Out.WriteLine("-------------------------------------------------------------"); + } + } +} \ No newline at end of file diff --git a/SteamPageParserRunner/Properties/AssemblyInfo.cs b/SteamPageParserRunner/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..0fb3320 --- /dev/null +++ b/SteamPageParserRunner/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("SteamPageParserRunner")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("SteamPageParserRunner")] +[assembly: AssemblyCopyright("Copyright © 2015")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("7f4ebff3-90a6-4a5d-9bda-dbe238a64a31")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/SteamPageParserRunner/SteamPageParserRunner.csproj b/SteamPageParserRunner/SteamPageParserRunner.csproj new file mode 100644 index 0000000..ce22524 --- /dev/null +++ b/SteamPageParserRunner/SteamPageParserRunner.csproj @@ -0,0 +1,66 @@ + + + + + Debug + AnyCPU + {7F4EBFF3-90A6-4A5D-9BDA-DBE238A64A31} + Exe + Properties + SteamPageParserRunner + SteamPageParserRunner + v4.5.2 + 512 + true + + + AnyCPU + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + {4903b432-a0f7-4e1a-9068-2560e29bae7c} + SteamPageParser + + + + + \ No newline at end of file