2023-06-01 23:20:11 +03:00
|
|
|
package twitterscraper
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"strconv"
|
2024-08-01 17:39:06 +03:00
|
|
|
"strings"
|
2023-06-01 23:20:11 +03:00
|
|
|
)
|
|
|
|
|
|
2024-04-25 23:23:29 +03:00
|
|
|
type tweet struct {
|
|
|
|
|
Core struct {
|
2023-06-01 23:20:11 +03:00
|
|
|
UserResults struct {
|
|
|
|
|
Result struct {
|
|
|
|
|
IsBlueVerified bool `json:"is_blue_verified"`
|
|
|
|
|
Legacy legacyUser `json:"legacy"`
|
|
|
|
|
} `json:"result"`
|
|
|
|
|
} `json:"user_results"`
|
|
|
|
|
} `json:"core"`
|
|
|
|
|
Views struct {
|
|
|
|
|
Count string `json:"count"`
|
|
|
|
|
} `json:"views"`
|
|
|
|
|
NoteTweet struct {
|
|
|
|
|
NoteTweetResults struct {
|
|
|
|
|
Result struct {
|
|
|
|
|
Text string `json:"text"`
|
|
|
|
|
} `json:"result"`
|
|
|
|
|
} `json:"note_tweet_results"`
|
|
|
|
|
} `json:"note_tweet"`
|
|
|
|
|
QuotedStatusResult struct {
|
|
|
|
|
Result *result `json:"result"`
|
|
|
|
|
} `json:"quoted_status_result"`
|
|
|
|
|
Legacy legacyTweet `json:"legacy"`
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-25 23:23:29 +03:00
|
|
|
type result struct {
|
|
|
|
|
Typename string `json:"__typename"`
|
|
|
|
|
tweet
|
|
|
|
|
Tweet tweet `json:"tweet"`
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-01 23:20:11 +03:00
|
|
|
func (result *result) parse() *Tweet {
|
|
|
|
|
if result.NoteTweet.NoteTweetResults.Result.Text != "" {
|
|
|
|
|
result.Legacy.FullText = result.NoteTweet.NoteTweetResults.Result.Text
|
|
|
|
|
}
|
2024-04-25 23:48:41 +03:00
|
|
|
var legacy *legacyTweet = &result.Legacy
|
|
|
|
|
var user *legacyUser = &result.Core.UserResults.Result.Legacy
|
2024-04-25 23:23:29 +03:00
|
|
|
if result.Typename == "TweetWithVisibilityResults" {
|
2024-04-25 23:48:41 +03:00
|
|
|
legacy = &result.Tweet.Legacy
|
2024-04-25 23:23:29 +03:00
|
|
|
user = &result.Tweet.Core.UserResults.Result.Legacy
|
|
|
|
|
}
|
2024-04-25 23:48:41 +03:00
|
|
|
tw := parseLegacyTweet(user, legacy)
|
2023-07-03 14:57:29 +03:00
|
|
|
if tw == nil {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2023-06-01 23:20:11 +03:00
|
|
|
if tw.Views == 0 && result.Views.Count != "" {
|
|
|
|
|
tw.Views, _ = strconv.Atoi(result.Views.Count)
|
|
|
|
|
}
|
|
|
|
|
if result.QuotedStatusResult.Result != nil {
|
|
|
|
|
tw.QuotedStatus = result.QuotedStatusResult.Result.parse()
|
|
|
|
|
}
|
2024-11-08 21:57:56 +02:00
|
|
|
tw.HTML = expandURLs(tw.HTML, legacy.Entities.URLs, legacy.ExtendedEntities.Media)
|
|
|
|
|
tw.HTML = expandURLs(tw.Text, legacy.Entities.URLs, legacy.ExtendedEntities.Media)
|
2023-06-01 23:20:11 +03:00
|
|
|
return tw
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-03 14:53:11 +01:00
|
|
|
type userResult struct {
|
|
|
|
|
Typename string `json:"__typename"`
|
|
|
|
|
ID string `json:"id"`
|
|
|
|
|
RestID string `json:"rest_id"`
|
|
|
|
|
AffiliatesHighlightedLabel struct{} `json:"affiliates_highlighted_label"`
|
|
|
|
|
HasGraduatedAccess bool `json:"has_graduated_access"`
|
|
|
|
|
IsBlueVerified bool `json:"is_blue_verified"`
|
|
|
|
|
ProfileImageShape string `json:"profile_image_shape"`
|
|
|
|
|
Legacy legacyUserV2 `json:"legacy"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (result *userResult) parse() Profile {
|
|
|
|
|
return parseProfileV2(*result)
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-13 04:45:08 +03:00
|
|
|
type item struct {
|
2024-08-01 17:39:06 +03:00
|
|
|
EntryID string `json:"entryId"`
|
|
|
|
|
Item struct {
|
2024-02-13 04:45:08 +03:00
|
|
|
ItemContent struct {
|
2024-08-01 17:39:06 +03:00
|
|
|
ItemType string `json:"itemType"`
|
2024-02-13 04:45:08 +03:00
|
|
|
TweetDisplayType string `json:"tweetDisplayType"`
|
|
|
|
|
TweetResults struct {
|
|
|
|
|
Result result `json:"result"`
|
|
|
|
|
} `json:"tweet_results"`
|
2024-08-01 17:39:06 +03:00
|
|
|
CursorType string `json:"cursorType"`
|
|
|
|
|
Value string `json:"value"`
|
2024-02-13 04:45:08 +03:00
|
|
|
} `json:"itemContent"`
|
|
|
|
|
} `json:"item"`
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-01 23:20:11 +03:00
|
|
|
type entry struct {
|
|
|
|
|
Content struct {
|
2024-02-13 04:45:08 +03:00
|
|
|
CursorType string `json:"cursorType"`
|
|
|
|
|
Value string `json:"value"`
|
|
|
|
|
Items []item `json:"items"`
|
2023-06-01 23:20:11 +03:00
|
|
|
ItemContent struct {
|
2024-08-01 17:39:06 +03:00
|
|
|
ItemType string `json:"itemType"`
|
2023-06-05 13:49:56 +03:00
|
|
|
TweetDisplayType string `json:"tweetDisplayType"`
|
|
|
|
|
TweetResults struct {
|
2023-06-01 23:20:11 +03:00
|
|
|
Result result `json:"result"`
|
|
|
|
|
} `json:"tweet_results"`
|
2023-07-03 14:38:46 +03:00
|
|
|
UserDisplayType string `json:"userDisplayType"`
|
|
|
|
|
UserResults struct {
|
2024-02-03 14:53:11 +01:00
|
|
|
Result userResult `json:"result"`
|
2023-07-03 14:38:46 +03:00
|
|
|
} `json:"user_results"`
|
2024-08-01 17:39:06 +03:00
|
|
|
CursorType string `json:"cursorType"`
|
|
|
|
|
Value string `json:"value"`
|
2023-06-01 23:20:11 +03:00
|
|
|
} `json:"itemContent"`
|
|
|
|
|
} `json:"content"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// timeline v2 JSON object
|
|
|
|
|
type timelineV2 struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
User struct {
|
|
|
|
|
Result struct {
|
|
|
|
|
TimelineV2 struct {
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Instructions []struct {
|
2024-02-13 04:45:08 +03:00
|
|
|
ModuleItems []item `json:"moduleItems"`
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
Entry entry `json:"entry"`
|
|
|
|
|
Type string `json:"type"`
|
2023-06-01 23:20:11 +03:00
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"timeline"`
|
|
|
|
|
} `json:"timeline_v2"`
|
2024-02-03 14:53:11 +01:00
|
|
|
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Instructions []struct {
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
Entry entry `json:"entry"`
|
|
|
|
|
Type string `json:"type"`
|
|
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"timeline"`
|
|
|
|
|
} `json:"timeline"`
|
2023-06-01 23:20:11 +03:00
|
|
|
} `json:"result"`
|
|
|
|
|
} `json:"user"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (timeline *timelineV2) parseTweets() ([]*Tweet, string) {
|
|
|
|
|
var cursor string
|
|
|
|
|
var tweets []*Tweet
|
|
|
|
|
for _, instruction := range timeline.Data.User.Result.TimelineV2.Timeline.Instructions {
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
2024-04-25 23:48:41 +03:00
|
|
|
if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" || entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
2023-06-01 23:20:11 +03:00
|
|
|
if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-02-13 04:45:08 +03:00
|
|
|
if len(entry.Content.Items) > 0 {
|
|
|
|
|
for _, item := range entry.Content.Items {
|
|
|
|
|
if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(instruction.ModuleItems) > 0 {
|
|
|
|
|
for _, entry := range instruction.ModuleItems {
|
2024-04-25 23:48:41 +03:00
|
|
|
if entry.Item.ItemContent.TweetResults.Result.Typename == "Tweet" || entry.Item.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
2024-02-13 04:45:08 +03:00
|
|
|
if tweet := entry.Item.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-06-01 23:20:11 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return tweets, cursor
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-21 04:41:51 +03:00
|
|
|
type bookmarksTimelineV2 struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
Bookmarks struct {
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Instructions []struct {
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
Type string `json:"type"`
|
|
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"timeline"`
|
|
|
|
|
} `json:"bookmark_timeline_v2"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (timeline *bookmarksTimelineV2) parseTweets() ([]*Tweet, string) {
|
|
|
|
|
var cursor string
|
|
|
|
|
var tweets []*Tweet
|
|
|
|
|
for _, instruction := range timeline.Data.Bookmarks.Timeline.Instructions {
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" {
|
|
|
|
|
if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return tweets, cursor
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-05 17:57:18 +03:00
|
|
|
type retweetersTimelineV2 struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
RetweetersTimeline struct {
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Instructions []struct {
|
|
|
|
|
Type string `json:"type"`
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"timeline"`
|
|
|
|
|
} `json:"retweeters_timeline"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (timeline *retweetersTimelineV2) parseUsers() ([]*Profile, string) {
|
|
|
|
|
var cursor string
|
|
|
|
|
var users []*Profile
|
|
|
|
|
for _, instruction := range timeline.Data.RetweetersTimeline.Timeline.Instructions {
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if entry.Content.ItemContent.UserResults.Result.Typename == "User" {
|
|
|
|
|
user := entry.Content.ItemContent.UserResults.Result.parse()
|
|
|
|
|
users = append(users, &user)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return users, cursor
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-03 14:53:11 +01:00
|
|
|
func (timeline *timelineV2) parseUsers() ([]*Profile, string) {
|
|
|
|
|
var cursor string
|
|
|
|
|
var users []*Profile
|
|
|
|
|
for _, instruction := range timeline.Data.User.Result.Timeline.Timeline.Instructions {
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if entry.Content.ItemContent.UserResults.Result.Typename == "User" {
|
|
|
|
|
user := entry.Content.ItemContent.UserResults.Result.parse()
|
|
|
|
|
users = append(users, &user)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return users, cursor
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-01 23:20:11 +03:00
|
|
|
type threadedConversation struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
ThreadedConversationWithInjectionsV2 struct {
|
|
|
|
|
Instructions []struct {
|
2024-08-01 17:39:06 +03:00
|
|
|
Type string `json:"type"`
|
|
|
|
|
Entry entry `json:"entry"`
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
ModuleItems []item `json:"moduleItems"`
|
2023-06-01 23:20:11 +03:00
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"threaded_conversation_with_injections_v2"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-01 17:39:06 +03:00
|
|
|
func (conversation *threadedConversation) parse(focalTweetID string) ([]*Tweet, []*ThreadCursor) {
|
2023-06-01 23:20:11 +03:00
|
|
|
var tweets []*Tweet
|
2024-08-01 17:39:06 +03:00
|
|
|
var cursors []*ThreadCursor
|
2023-06-01 23:20:11 +03:00
|
|
|
for _, instruction := range conversation.Data.ThreadedConversationWithInjectionsV2.Instructions {
|
|
|
|
|
for _, entry := range instruction.Entries {
|
2024-04-25 23:23:29 +03:00
|
|
|
if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" || entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
2023-06-01 23:20:11 +03:00
|
|
|
if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
2023-06-05 13:49:56 +03:00
|
|
|
if entry.Content.ItemContent.TweetDisplayType == "SelfThread" {
|
|
|
|
|
tweet.IsSelfThread = true
|
|
|
|
|
}
|
2023-06-01 23:20:11 +03:00
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-01 17:39:06 +03:00
|
|
|
|
|
|
|
|
if entry.Content.ItemContent.CursorType != "" && entry.Content.ItemContent.Value != "" {
|
|
|
|
|
cursors = append(cursors, &ThreadCursor{
|
|
|
|
|
FocalTweetID: focalTweetID,
|
|
|
|
|
ThreadID: focalTweetID,
|
|
|
|
|
Cursor: entry.Content.ItemContent.Value,
|
|
|
|
|
CursorType: entry.Content.ItemContent.CursorType,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-01 23:20:11 +03:00
|
|
|
for _, item := range entry.Content.Items {
|
2024-04-25 23:23:29 +03:00
|
|
|
if item.Item.ItemContent.TweetResults.Result.Typename == "Tweet" || item.Item.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
2023-06-01 23:20:11 +03:00
|
|
|
if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
2023-06-05 13:49:56 +03:00
|
|
|
if item.Item.ItemContent.TweetDisplayType == "SelfThread" {
|
|
|
|
|
tweet.IsSelfThread = true
|
|
|
|
|
}
|
2023-06-01 23:20:11 +03:00
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-01 17:39:06 +03:00
|
|
|
|
|
|
|
|
if item.Item.ItemContent.CursorType != "" && item.Item.ItemContent.Value != "" {
|
|
|
|
|
threadID := ""
|
|
|
|
|
|
|
|
|
|
entryId := strings.Split(item.EntryID, "-")
|
|
|
|
|
if len(entryId) > 1 && entryId[0] == "conversationthread" {
|
|
|
|
|
if i, _ := strconv.Atoi(entryId[1]); i != 0 {
|
|
|
|
|
threadID = entryId[1]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cursors = append(cursors, &ThreadCursor{
|
|
|
|
|
FocalTweetID: focalTweetID,
|
|
|
|
|
ThreadID: threadID,
|
|
|
|
|
Cursor: item.Item.ItemContent.Value,
|
|
|
|
|
CursorType: item.Item.ItemContent.CursorType,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for _, item := range instruction.ModuleItems {
|
|
|
|
|
if item.Item.ItemContent.TweetResults.Result.Typename == "Tweet" || item.Item.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
|
|
|
|
if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
|
|
|
|
if item.Item.ItemContent.TweetDisplayType == "SelfThread" {
|
|
|
|
|
tweet.IsSelfThread = true
|
|
|
|
|
}
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if item.Item.ItemContent.CursorType != "" && item.Item.ItemContent.Value != "" {
|
|
|
|
|
threadID := ""
|
|
|
|
|
|
|
|
|
|
entryId := strings.Split(item.EntryID, "-")
|
|
|
|
|
if len(entryId) > 1 && entryId[0] == "conversationthread" {
|
|
|
|
|
if i, _ := strconv.Atoi(entryId[1]); i != 0 {
|
|
|
|
|
threadID = entryId[1]
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cursors = append(cursors, &ThreadCursor{
|
|
|
|
|
FocalTweetID: focalTweetID,
|
|
|
|
|
ThreadID: threadID,
|
|
|
|
|
Cursor: item.Item.ItemContent.Value,
|
|
|
|
|
CursorType: item.Item.ItemContent.CursorType,
|
|
|
|
|
})
|
2023-06-01 23:20:11 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-01 17:39:06 +03:00
|
|
|
|
2023-06-01 23:20:11 +03:00
|
|
|
for _, tweet := range tweets {
|
|
|
|
|
if tweet.InReplyToStatusID != "" {
|
|
|
|
|
for _, parentTweet := range tweets {
|
|
|
|
|
if parentTweet.ID == tweet.InReplyToStatusID {
|
|
|
|
|
tweet.InReplyToStatus = parentTweet
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-06-05 13:49:56 +03:00
|
|
|
if tweet.IsSelfThread && tweet.ConversationID == tweet.ID {
|
|
|
|
|
for _, childTweet := range tweets {
|
|
|
|
|
if childTweet.IsSelfThread && childTweet.ID != tweet.ID {
|
|
|
|
|
tweet.Thread = append(tweet.Thread, childTweet)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(tweet.Thread) == 0 {
|
|
|
|
|
tweet.IsSelfThread = false
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-06-01 23:20:11 +03:00
|
|
|
}
|
2024-08-01 17:39:06 +03:00
|
|
|
return tweets, cursors
|
2023-06-01 23:20:11 +03:00
|
|
|
}
|
2024-03-08 18:43:26 +03:00
|
|
|
|
|
|
|
|
type tweetResult struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
TweetResult struct {
|
|
|
|
|
Result result `json:"result"`
|
|
|
|
|
} `json:"tweetResult"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (tweetResult *tweetResult) parse() *Tweet {
|
|
|
|
|
return tweetResult.Data.TweetResult.Result.parse()
|
|
|
|
|
}
|