-
Notifications
You must be signed in to change notification settings - Fork 0
/
HTMLClassExtractor.cs
83 lines (67 loc) · 2.19 KB
/
HTMLClassExtractor.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
namespace CssScraper
{
internal class HTMLClassExtractor
{
private string inputFile;
private string outputFile;
public HTMLClassExtractor()
{
this.inputFile = this.outputFile = "";
}
public HTMLClassExtractor(string inputFile, string outputFile)
{
this.inputFile = inputFile;
this.outputFile = outputFile;
}
public string getInputFile()
{
return this.inputFile;
}
public string getOutputFile()
{
return this.outputFile;
}
public void setInputFile(string inputFile)
{
this.inputFile = inputFile;
}
public void setOutputFile(string outputFile)
{
this.outputFile = outputFile;
}
public List<string> ExtractClasses()
{
List<string> classNames = new List<string>();
StreamReader reader = new StreamReader(inputFile);
try
{
string htmlContent = reader.ReadToEnd();
reader.Close();
string pattern = @"class\s*=\s*""([^""]*)""";
MatchCollection matches = Regex.Matches(htmlContent, pattern);
foreach (Match match in matches)
{
GroupCollection groups = match.Groups;
if (groups.Count > 1)
{
string classValue = groups[1].Value;
string[] classes = classValue.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
foreach (string className in classes)
{
classNames.Add(className);
}
}
}
}
catch (Exception e)
{
Console.WriteLine($"Error occur : {e.Message}");
}
return classNames;
}
}
}