Parse GIFs for in the GetTweet API

I am writing an app that needs to get info about all media in a tweet and forward it to a Telegram chat.

Today animated GIFs are ignored in the response of TweetDetail, although the are there (exept for the caveat mentioned below). So without this change the GIFs are not present in the twitterscraper.Tweet struct.

Following the analogy with the split between Photos and Videos I added GIFs to the Tweet type.

There is one caveat that I found during testing that I can't really explain. But GIFs don't occur in the response unless the bearerToken2 is set. I don't know what this token means, maybe it somehow identifies a destop-browser variant of twitter frontend, but with this token the GIFs are present in the response.

Please note that I never wrote Go code before in my life. I am using this library via the FFI to link it to my Rust codebase.
This commit is contained in:
Veetaha 2023-06-18 19:13:26 +02:00
parent 15e5f233b7
commit 9c3764f484
4 changed files with 138 additions and 23 deletions

View file

@ -261,6 +261,26 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
} }
tw.Videos = append(tw.Videos, video) tw.Videos = append(tw.Videos, video)
} else if media.Type == "animated_gif" {
gif := GIF{
ID: media.IDStr,
Preview: media.MediaURLHttps,
}
// Twitter's API doesn't provide bitrate for GIFs, (it's always set to zero).
// Therefore we check for `>=` instead of `>` in the loop below.
// Also, GIFs have just a single variant today. Just in case that changes in the future,
// and there will be multiple variants, we'll pick the one with the highest bitrate,
// if other one will have a non-zero bitrate.
maxBitrate := 0
for _, variant := range media.VideoInfo.Variants {
if variant.Bitrate >= maxBitrate {
gif.URL = variant.URL
maxBitrate = variant.Bitrate
}
}
tw.GIFs = append(tw.GIFs, gif)
} }
if !tw.SensitiveContent { if !tw.SensitiveContent {
@ -315,6 +335,13 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
} }
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url) tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
} }
for _, gif := range tw.GIFs {
url := gif.Preview
if stringInSlice(url, foundedMedia) {
continue
}
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
}
tw.HTML = strings.Replace(tw.HTML, "\n", "<br>", -1) tw.HTML = strings.Replace(tw.HTML, "\n", "<br>", -1)
return tw return tw
} }

View file

@ -85,14 +85,13 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st
// GetTweet get a single tweet by ID. // GetTweet get a single tweet by ID.
func (s *Scraper) GetTweet(id string) (*Tweet, error) { func (s *Scraper) GetTweet(id string) (*Tweet, error) {
req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/wETHelmSuBQR5r-dgUlPxg/TweetDetail") req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail")
if err != nil { if err != nil {
return nil, err return nil, err
} }
variables := map[string]interface{}{ variables := map[string]interface{}{
"focalTweetId": id, "focalTweetId": id,
"referrer": "profile",
"with_rux_injections": false, "with_rux_injections": false,
"includePromotedContent": true, "includePromotedContent": true,
"withCommunity": true, "withCommunity": true,
@ -103,14 +102,13 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) {
} }
features := map[string]interface{}{ features := map[string]interface{}{
"rweb_lists_timeline_redesign_enabled": true, "rweb_lists_timeline_redesign_enabled": true,
"responsive_web_graphql_exclude_directive_enabled": true, "responsive_web_graphql_exclude_directive_enabled": true,
"verified_phone_label_enabled": false, "verified_phone_label_enabled": false,
"creator_subscriptions_tweet_preview_api_enabled": true, "creator_subscriptions_tweet_preview_api_enabled": true,
"responsive_web_graphql_timeline_navigation_enabled": true, "responsive_web_graphql_timeline_navigation_enabled": true,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
"tweetypie_unmention_optimization_enabled": true, "tweetypie_unmention_optimization_enabled": true,
"vibe_api_enabled": true,
"responsive_web_edit_tweet_api_enabled": true, "responsive_web_edit_tweet_api_enabled": true,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
"view_counts_everywhere_api_enabled": true, "view_counts_everywhere_api_enabled": true,
@ -119,10 +117,8 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) {
"freedom_of_speech_not_reach_fetch_enabled": true, "freedom_of_speech_not_reach_fetch_enabled": true,
"standardized_nudges_misinfo": true, "standardized_nudges_misinfo": true,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false, "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
"interactive_text_enabled": true,
"responsive_web_text_conversations_enabled": false,
"longform_notetweets_rich_text_read_enabled": true, "longform_notetweets_rich_text_read_enabled": true,
"longform_notetweets_inline_media_enabled": false, "longform_notetweets_inline_media_enabled": true,
"responsive_web_enhance_cards_enabled": false, "responsive_web_enhance_cards_enabled": false,
} }
@ -132,7 +128,21 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) {
req.URL.RawQuery = query.Encode() req.URL.RawQuery = query.Encode()
var conversation threadedConversation var conversation threadedConversation
// Surprisingly, if bearerToken2 is not set, then animated GIFs are not
// present in the response for tweets with a GIF + a photo like this one:
// https://twitter.com/Twitter/status/1580661436132757506
curBearerToken := s.bearerToken
if curBearerToken != bearerToken2 {
s.setBearerToken(bearerToken2)
}
err = s.RequestAPI(req, &conversation) err = s.RequestAPI(req, &conversation)
if curBearerToken != bearerToken2 {
s.setBearerToken(curBearerToken)
}
if err != nil { if err != nil {
return nil, err return nil, err
} }

View file

@ -71,8 +71,18 @@ func TestGetTweets(t *testing.T) {
} }
} }
func TestGetTweet(t *testing.T) { func assertGetTweet(t *testing.T, expectedTweet *twitterscraper.Tweet) {
sample := twitterscraper.Tweet{ scraper := twitterscraper.New()
actualTweet, err := scraper.GetTweet(expectedTweet.ID)
if err != nil {
t.Error(err)
} else if diff := cmp.Diff(expectedTweet, actualTweet, cmpOptions...); diff != "" {
t.Error("Resulting tweet does not match the sample", diff)
}
}
func TestGetTweetWithVideo(t *testing.T) {
expectedTweet := twitterscraper.Tweet{
ConversationID: "1328684389388185600", ConversationID: "1328684389388185600",
HTML: "That thing you didnt Tweet but wanted to but didnt but got so close but then were like nah. <br><br>We have a place for that now—Fleets! <br><br>Rolling out to everyone starting today. <br><a href=\"https://t.co/auQAHXZMfH\"><img src=\"https://pbs.twimg.com/amplify_video_thumb/1328684333599756289/img/cP5KwbIXbGunNSBy.jpg\"/></a>", HTML: "That thing you didnt Tweet but wanted to but didnt but got so close but then were like nah. <br><br>We have a place for that now—Fleets! <br><br>Rolling out to everyone starting today. <br><a href=\"https://t.co/auQAHXZMfH\"><img src=\"https://pbs.twimg.com/amplify_video_thumb/1328684333599756289/img/cP5KwbIXbGunNSBy.jpg\"/></a>",
ID: "1328684389388185600", ID: "1328684389388185600",
@ -90,15 +100,75 @@ func TestGetTweet(t *testing.T) {
URL: "https://video.twimg.com/amplify_video/1328684333599756289/vid/960x720/PcL8yv8KhgQ48Qpt.mp4?tag=13", URL: "https://video.twimg.com/amplify_video/1328684333599756289/vid/960x720/PcL8yv8KhgQ48Qpt.mp4?tag=13",
}}, }},
} }
scraper := twitterscraper.New() assertGetTweet(t, &expectedTweet)
tweet, err := scraper.GetTweet("1328684389388185600") }
if err != nil {
t.Error(err) func TestGetTweetWithMultiplePhotos(t *testing.T) {
} else { expectedTweet := twitterscraper.Tweet{
if diff := cmp.Diff(sample, *tweet, cmpOptions...); diff != "" { ConversationID: "1390026628957417473",
t.Error("Resulting tweet does not match the sample", diff) HTML: `no bird too tall, no crop too short<br><br>introducing bigger and better images on iOS and Android, now available to everyone <br><a href="https://t.co/2buHfhfRAx"><img src="https://pbs.twimg.com/media/E0pd2L2XEAQ_gnn.jpg"/></a><br><img src="https://pbs.twimg.com/media/E0pd2hPXoAY9-TZ.jpg"/>`,
} ID: "1390026628957417473",
Name: "Twitter",
PermanentURL: "https://twitter.com/Twitter/status/1390026628957417473",
Photos: []twitterscraper.Photo{
{ID: "1390026620472332292", URL: "https://pbs.twimg.com/media/E0pd2L2XEAQ_gnn.jpg"},
{ID: "1390026626214371334", URL: "https://pbs.twimg.com/media/E0pd2hPXoAY9-TZ.jpg"},
},
Text: "no bird too tall, no crop too short\n\nintroducing bigger and better images on iOS and Android, now available to everyone https://t.co/2buHfhfRAx",
TimeParsed: time.Date(2021, 5, 5, 19, 32, 28, 0, time.FixedZone("UTC", 0)),
Timestamp: 1620243148,
UserID: "783214",
Username: "Twitter",
} }
assertGetTweet(t, &expectedTweet)
}
func TestGetTweetWithGIF(t *testing.T) {
expectedTweet := twitterscraper.Tweet{
ConversationID: "1288540609310056450",
GIFs: []twitterscraper.GIF{
{
ID: "1288540582768517123",
Preview: "https://pbs.twimg.com/tweet_video_thumb/EeHQ1UKXoAMVxWB.jpg",
URL: "https://video.twimg.com/tweet_video/EeHQ1UKXoAMVxWB.mp4",
},
},
Hashtags: []string{"CountdownToMars"},
HTML: `Like for liftoff! <a href="https://twitter.com/hashtag/CountdownToMars">#CountdownToMars</a> <br><a href="https://t.co/yLe331pHfY"><img src="https://pbs.twimg.com/tweet_video_thumb/EeHQ1UKXoAMVxWB.jpg"/></a>`,
ID: "1288540609310056450",
Name: "Twitter",
PermanentURL: "https://twitter.com/Twitter/status/1288540609310056450",
Text: "Like for liftoff! #CountdownToMars https://t.co/yLe331pHfY",
TimeParsed: time.Date(2020, 7, 29, 18, 23, 15, 0, time.FixedZone("UTC", 0)),
Timestamp: 1596046995,
UserID: "783214",
Username: "Twitter",
}
assertGetTweet(t, &expectedTweet)
}
func TestGetTweetWithPhotoAndGIF(t *testing.T) {
expectedTweet := twitterscraper.Tweet{
ConversationID: "1580661436132757506",
GIFs: []twitterscraper.GIF{
{
ID: "1580661428335382531",
Preview: "https://pbs.twimg.com/tweet_video_thumb/Fe-jMcIXkAMXK_W.jpg",
URL: "https://video.twimg.com/tweet_video/Fe-jMcIXkAMXK_W.mp4",
},
},
HTML: `a hit Tweet <br><a href="https://t.co/2C7cah4KzW"><img src="https://pbs.twimg.com/media/Fe-jMcGWQAAFWoG.jpg"/></a><br><img src="https://pbs.twimg.com/tweet_video_thumb/Fe-jMcIXkAMXK_W.jpg"/>`,
ID: "1580661436132757506",
Name: "Twitter",
PermanentURL: "https://twitter.com/Twitter/status/1580661436132757506",
Photos: []twitterscraper.Photo{{ID: "1580661428326907904", URL: "https://pbs.twimg.com/media/Fe-jMcGWQAAFWoG.jpg"}},
Text: "a hit Tweet https://t.co/2C7cah4KzW",
TimeParsed: time.Date(2022, 10, 13, 20, 47, 8, 0, time.FixedZone("UTC", 0)),
Timestamp: 1665694028,
UserID: "783214",
Username: "Twitter",
}
assertGetTweet(t, &expectedTweet)
} }
func TestTweetMentions(t *testing.T) { func TestTweetMentions(t *testing.T) {

View file

@ -23,9 +23,17 @@ type (
URL string URL string
} }
// GIF type.
GIF struct {
ID string
Preview string
URL string
}
// Tweet type. // Tweet type.
Tweet struct { Tweet struct {
ConversationID string ConversationID string
GIFs []GIF
Hashtags []string Hashtags []string
HTML string HTML string
ID string ID string