twitter-scrapper/timeline_v1.go

295 lines
8.2 KiB
Go
Raw Permalink Normal View History

2021-07-16 11:08:43 +03:00
package twitterscraper
import (
"fmt"
2023-05-18 15:16:49 -04:00
"strconv"
2021-07-16 11:08:43 +03:00
"strings"
"time"
)
// legacy timeline JSON object
type timelineV1 struct {
2021-07-16 11:08:43 +03:00
GlobalObjects struct {
Tweets map[string]legacyTweet `json:"tweets"`
Users map[string]legacyUser `json:"users"`
2021-07-16 11:08:43 +03:00
} `json:"globalObjects"`
Timeline struct {
Instructions []struct {
AddEntries struct {
Entries []struct {
Content struct {
Item struct {
Content struct {
Tweet struct {
ID string `json:"id"`
} `json:"tweet"`
User struct {
ID string `json:"id"`
} `json:"user"`
} `json:"content"`
} `json:"item"`
Operation struct {
Cursor struct {
Value string `json:"value"`
CursorType string `json:"cursorType"`
} `json:"cursor"`
} `json:"operation"`
TimelineModule struct {
Items []struct {
Item struct {
ClientEventInfo struct {
Details struct {
GuideDetails struct {
TransparentGuideDetails struct {
TrendMetadata struct {
TrendName string `json:"trendName"`
} `json:"trendMetadata"`
} `json:"transparentGuideDetails"`
} `json:"guideDetails"`
} `json:"details"`
} `json:"clientEventInfo"`
} `json:"item"`
} `json:"items"`
} `json:"timelineModule"`
} `json:"content,omitempty"`
} `json:"entries"`
} `json:"addEntries"`
PinEntry struct {
Entry struct {
Content struct {
Item struct {
Content struct {
Tweet struct {
ID string `json:"id"`
} `json:"tweet"`
} `json:"content"`
} `json:"item"`
} `json:"content"`
} `json:"entry"`
} `json:"pinEntry,omitempty"`
ReplaceEntry struct {
Entry struct {
Content struct {
Operation struct {
Cursor struct {
Value string `json:"value"`
CursorType string `json:"cursorType"`
} `json:"cursor"`
} `json:"operation"`
} `json:"content"`
} `json:"entry"`
} `json:"replaceEntry,omitempty"`
} `json:"instructions"`
} `json:"timeline"`
}
func (timeline *timelineV1) parseTweet(id string) *Tweet {
2021-07-16 11:08:43 +03:00
if tweet, ok := timeline.GlobalObjects.Tweets[id]; ok {
username := timeline.GlobalObjects.Users[tweet.UserIDStr].ScreenName
2023-05-10 05:05:07 -07:00
name := timeline.GlobalObjects.Users[tweet.UserIDStr].Name
2021-07-16 11:08:43 +03:00
tw := &Tweet{
ID: id,
ConversationID: tweet.ConversationIDStr,
Likes: tweet.FavoriteCount,
Name: name,
PermanentURL: fmt.Sprintf("https://x.com/%s/status/%s", username, id),
Replies: tweet.ReplyCount,
Retweets: tweet.RetweetCount,
Text: tweet.FullText,
UserID: tweet.UserIDStr,
Username: username,
2021-07-16 11:08:43 +03:00
}
tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
if err == nil {
tw.TimeParsed = tm
tw.Timestamp = tm.Unix()
}
if tweet.Place.ID != "" {
tw.Place = &tweet.Place
}
2021-07-16 11:08:43 +03:00
if tweet.QuotedStatusIDStr != "" {
tw.IsQuoted = true
tw.QuotedStatus = timeline.parseTweet(tweet.QuotedStatusIDStr)
tw.QuotedStatusID = tweet.QuotedStatusIDStr
2021-07-16 11:08:43 +03:00
}
if tweet.InReplyToStatusIDStr != "" {
tw.IsReply = true
tw.InReplyToStatus = timeline.parseTweet(tweet.InReplyToStatusIDStr)
tw.InReplyToStatusID = tweet.InReplyToStatusIDStr
2021-07-16 11:08:43 +03:00
}
if tweet.RetweetedStatusIDStr != "" {
tw.IsRetweet = true
tw.RetweetedStatus = timeline.parseTweet(tweet.RetweetedStatusIDStr)
tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
2021-07-16 11:08:43 +03:00
}
if tweet.SelfThread.IDStr == id {
tw.IsSelfThread = true
}
2023-05-18 15:16:49 -04:00
if tweet.Views.Count != "" {
views, viewsErr := strconv.Atoi(tweet.Views.Count)
if viewsErr != nil {
views = 0
}
tw.Views = views
}
2021-07-16 11:08:43 +03:00
for _, pinned := range timeline.GlobalObjects.Users[tweet.UserIDStr].PinnedTweetIdsStr {
if tweet.IDStr == pinned {
2021-07-16 11:08:43 +03:00
tw.IsPin = true
break
}
}
for _, hash := range tweet.Entities.Hashtags {
tw.Hashtags = append(tw.Hashtags, hash.Text)
}
2022-04-09 14:41:12 +03:00
2023-05-10 05:02:21 -07:00
for _, mention := range tweet.Entities.UserMentions {
tw.Mentions = append(tw.Mentions, Mention{
ID: mention.IDStr,
Username: mention.ScreenName,
Name: mention.Name,
})
}
2022-04-09 14:41:12 +03:00
for _, media := range tweet.ExtendedEntities.Media {
2021-07-16 11:08:43 +03:00
if media.Type == "photo" {
2023-05-10 03:18:34 -07:00
photo := Photo{
ID: media.IDStr,
URL: media.MediaURLHttps,
}
tw.Photos = append(tw.Photos, photo)
2022-04-09 20:01:04 +03:00
} else if media.Type == "video" {
2021-07-16 11:08:43 +03:00
video := Video{
ID: media.IDStr,
Preview: media.MediaURLHttps,
}
2022-04-09 14:41:12 +03:00
2021-07-16 11:08:43 +03:00
maxBitrate := 0
for _, variant := range media.VideoInfo.Variants {
if variant.Bitrate > maxBitrate {
video.URL = strings.TrimSuffix(variant.URL, "?tag=10")
maxBitrate = variant.Bitrate
2021-07-16 11:08:43 +03:00
}
}
2022-03-04 20:15:29 +02:00
2021-07-16 11:08:43 +03:00
tw.Videos = append(tw.Videos, video)
}
2022-03-04 20:15:29 +02:00
if !tw.SensitiveContent {
sensitive := media.ExtSensitiveMediaWarning
tw.SensitiveContent = sensitive.AdultContent || sensitive.GraphicViolence || sensitive.Other
}
2021-07-16 11:08:43 +03:00
}
2022-03-04 20:15:29 +02:00
2021-07-16 11:08:43 +03:00
for _, url := range tweet.Entities.URLs {
tw.URLs = append(tw.URLs, url.ExpandedURL)
}
tw.HTML = tweet.FullText
tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
return fmt.Sprintf(`<a href="https://x.com/hashtag/%s">%s</a>`,
2021-07-16 11:08:43 +03:00
strings.TrimPrefix(hashtag, "#"),
hashtag,
)
})
tw.HTML = reUsername.ReplaceAllStringFunc(tw.HTML, func(username string) string {
return fmt.Sprintf(`<a href="https://x.com/%s">%s</a>`,
2021-07-16 11:08:43 +03:00
strings.TrimPrefix(username, "@"),
username,
)
})
2022-04-09 20:42:11 +03:00
var foundedMedia []string
2021-07-16 11:08:43 +03:00
tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
for _, entity := range tweet.Entities.URLs {
if tco == entity.URL {
return fmt.Sprintf(`<a href="%s">%s</a>`, entity.ExpandedURL, tco)
}
}
2022-04-09 20:01:04 +03:00
for _, entity := range tweet.ExtendedEntities.Media {
2021-07-16 11:08:43 +03:00
if tco == entity.URL {
2022-04-09 20:42:11 +03:00
foundedMedia = append(foundedMedia, entity.MediaURLHttps)
2021-07-16 11:08:43 +03:00
return fmt.Sprintf(`<br><a href="%s"><img src="%s"/></a>`, tco, entity.MediaURLHttps)
}
}
return tco
})
2023-05-10 03:18:34 -07:00
for _, photo := range tw.Photos {
url := photo.URL
if stringInSlice(url, foundedMedia) {
continue
}
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
}
for _, video := range tw.Videos {
url := video.Preview
2022-04-09 20:42:11 +03:00
if stringInSlice(url, foundedMedia) {
continue
}
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
}
2021-07-16 11:08:43 +03:00
tw.HTML = strings.Replace(tw.HTML, "\n", "<br>", -1)
return tw
}
return nil
}
func (timeline *timelineV1) parseTweets() ([]*Tweet, string) {
2021-07-16 11:08:43 +03:00
var cursor string
var pinnedTweet *Tweet
var orderedTweets []*Tweet
for _, instruction := range timeline.Timeline.Instructions {
if instruction.PinEntry.Entry.Content.Item.Content.Tweet.ID != "" {
if tweet := timeline.parseTweet(instruction.PinEntry.Entry.Content.Item.Content.Tweet.ID); tweet != nil {
pinnedTweet = tweet
}
}
for _, entry := range instruction.AddEntries.Entries {
if tweet := timeline.parseTweet(entry.Content.Item.Content.Tweet.ID); tweet != nil {
orderedTweets = append(orderedTweets, tweet)
}
if entry.Content.Operation.Cursor.CursorType == "Bottom" {
cursor = entry.Content.Operation.Cursor.Value
}
}
if instruction.ReplaceEntry.Entry.Content.Operation.Cursor.CursorType == "Bottom" {
cursor = instruction.ReplaceEntry.Entry.Content.Operation.Cursor.Value
}
}
if pinnedTweet != nil && len(orderedTweets) > 0 {
orderedTweets = append([]*Tweet{pinnedTweet}, orderedTweets...)
}
return orderedTweets, cursor
}
func (timeline *timelineV1) parseUsers() ([]*Profile, string) {
2021-07-16 11:08:43 +03:00
users := make(map[string]Profile)
for id, user := range timeline.GlobalObjects.Users {
users[id] = parseProfile(user)
}
var cursor string
var orderedProfiles []*Profile
for _, instruction := range timeline.Timeline.Instructions {
for _, entry := range instruction.AddEntries.Entries {
if profile, ok := users[entry.Content.Item.Content.User.ID]; ok {
orderedProfiles = append(orderedProfiles, &profile)
}
if entry.Content.Operation.Cursor.CursorType == "Bottom" {
cursor = entry.Content.Operation.Cursor.Value
}
}
if instruction.ReplaceEntry.Entry.Content.Operation.Cursor.CursorType == "Bottom" {
cursor = instruction.ReplaceEntry.Entry.Content.Operation.Cursor.Value
}
}
return orderedProfiles, cursor
}