This repository has been archived by the owner on Aug 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
UserTweetsRequest.kt
161 lines (146 loc) · 6.4 KB
/
UserTweetsRequest.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
package io.github.yamin8000.twitterscrapper.helpers
import io.github.yamin8000.twitterscrapper.helpers.UserInfoHelper.getUser
import io.github.yamin8000.twitterscrapper.model.Tweet
import io.github.yamin8000.twitterscrapper.model.TweetStats
import io.github.yamin8000.twitterscrapper.model.TweetsPage
import io.github.yamin8000.twitterscrapper.util.Constants.DEFAULT_TWEETS_LIMIT
import io.github.yamin8000.twitterscrapper.util.Constants.FAILED_REQUEST_DELAY
import io.github.yamin8000.twitterscrapper.util.Constants.instances
import io.github.yamin8000.twitterscrapper.util.Utility.sanitizeNum
import io.github.yamin8000.twitterscrapper.util.Utility.sanitizeUsername
import io.github.yamin8000.twitterscrapper.web.retryingGet
import kotlinx.coroutines.*
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import kotlin.random.Random
import kotlin.random.nextLong
class UserTweetsRequest(
private val username: String,
private val limit: Int = DEFAULT_TWEETS_LIMIT
) {
suspend fun get(): List<Tweet> {
var page: TweetsPage? = null
val tweets = mutableListOf<Tweet>()
do {
page = getUserTweetsPage(page?.cursor ?: "")
tweets.addAll(page.tweets)
} while (page?.cursor != null && page.cursor?.isNotBlank() == true && tweets.size < limit)
return tweets.take(limit)
}
private suspend fun getUserTweetsPage(
cursor: String? = "",
delayTime: Long = 0
): TweetsPage {
delay(delayTime)
val response = withContext(Dispatchers.IO) {
retryingGet("${username.sanitizeUsername()}?cursor=$cursor")
}
return if (response != null) {
if (response.isSuccessful) {
parseUserTweetsPage(response.body.string())
} else handleUserTweetsPageError(cursor, response.code)
} else throw Exception("Failed to retrieve tweets page for $username")
}
private suspend fun handleUserTweetsPageError(
cursor: String? = "",
httpCode: Int
): TweetsPage {
return when (httpCode) {
404 -> throw NullPointerException("$username not found")
503 -> getUserTweetsPage(cursor, Random.nextLong(FAILED_REQUEST_DELAY))
else -> throw Exception("Failed to retrieve tweets page for $username")
}
}
private suspend fun parseUserTweetsPage(
html: String
): TweetsPage {
val doc = Jsoup.parse(html)
val cursor = parseCursor(doc)
val tweets = withContext(Dispatchers.IO) {
handleUserTweetsParsing(doc.selectFirst("div[class^=timeline-container] > div[class^=timeline]"))
}
return TweetsPage(tweets, cursor)
}
private fun parseCursor(doc: Document) = doc.selectFirst("div[class^=show-more] a")?.attr("href")
private suspend fun handleUserTweetsParsing(timeline: Element?): List<Tweet> {
return if (timeline != null) parseUserTweets(timeline) else listOf()
}
private suspend fun parseUserTweets(
timeline: Element
): List<Tweet> {
return buildList {
timeline.children().forEach { tweet ->
val htmlClass = tweet.className()
if (htmlClass.startsWith("timeline-item"))
getTimelineItem(tweet)?.let { add(it) }
if (htmlClass.startsWith("thread-line"))
tweet.children().forEach { item -> getTimelineItem(item)?.let { add(it) } }
}
}
}
private suspend fun getTimelineItem(
tweet: Element?
): Tweet? {
if (tweet != null) {
val thread = tweet.selectFirst("a[class^=show-thread]")
val retweet = tweet.selectFirst("div[class^=retweet-header]")
val username = getTweetUsername(tweet)
return Tweet(
content = tweet.selectFirst("div[class^=tweet-content]")?.text() ?: "",
date = getTweetDate(tweet),
link = "${instances.first().dropLast(1)}${getTweetLink(tweet)}",
user = if (username == null) null else getUser(username),
stats = getTweetStats(tweet),
isRetweet = retweet != null,
isThreaded = thread != null,
isPinned = tweet.selectFirst("div[class^=pinned]") != null,
replies = listOf(),
originalTweeter = getOriginalTweeter(retweet, username),
quote = getQuotedTweet(tweet.selectFirst("div[class^=quote]")),
thread = "${instances.first().dropLast(1)}${thread?.attr("href") ?: ""}"
)
} else return null
}
private suspend fun getQuotedTweet(
quote: Element?
): Tweet? {
return if (quote != null) {
val username = getTweetUsername(quote)
Tweet(
content = quote.selectFirst("div[class^=quote-text]")?.text() ?: "",
date = getTweetDate(quote),
link = quote.selectFirst("a[class^=quote-link]")?.attr("href") ?: "",
user = if (username == null) null else getUser(username),
stats = TweetStats(),
isRetweet = false,
isThreaded = false,
isPinned = false,
)
} else null
}
private suspend fun getOriginalTweeter(
retweet: Element?,
username: String?
) = if (retweet == null || username == null) null else getUser(username)
private fun getTweetUsername(tweet: Element) = tweet.selectFirst("a[class^=username]")?.text()?.sanitizeUsername()
private fun getTweetDate(tweet: Element) = tweet.selectFirst("span[class^=tweet-date] a")?.attr("title") ?: ""
private fun getTweetLink(tweet: Element) = tweet.selectFirst("a[class^=tweet-link]")?.attr("href") ?: ""
private fun getTweetStats(
tweet: Element
): TweetStats {
val rawStats = tweet.selectFirst("div[class^=tweet-stat]")
val stats = TweetStats()
rawStats?.children()?.forEach { stat ->
val icon = stat.selectFirst("div[class^=icon-container]")
val value = icon?.text().sanitizeNum()
when (icon?.children()?.firstOrNull()?.className() ?: "") {
"icon-comment" -> stats.replies = value
"icon-retweet" -> stats.retweets = value
"icon-quote" -> stats.quotes = value
"icon-heart" -> stats.likes = value
}
}
return stats
}
}