From 9f31f3890f033ca64c125bba13e43a556dbe5ddc Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sun, 13 Oct 2024 14:47:05 +0300 Subject: [PATCH 01/10] feat: expand URLs for profile and tweets --- types.go | 22 +++++++++++----------- util.go | 14 ++++++++++++-- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/types.go b/types.go index 35fccd5..0118fb2 100644 --- a/types.go +++ b/types.go @@ -10,6 +10,14 @@ type ( Name string } + // Url represents a URL with display, expanded, and index data. + Url struct { + DisplayURL string `json:"display_url"` + ExpandedURL string `json:"expanded_url"` + URL string `json:"url"` + Indices []int `json:"indices"` + } + // Photo type. Photo struct { ID string @@ -105,10 +113,7 @@ type ( Type string `json:"type"` URL string `json:"url"` } `json:"media"` - URLs []struct { - ExpandedURL string `json:"expanded_url"` - URL string `json:"url"` - } `json:"urls"` + URLs []Url `json:"urls"` UserMentions []struct { IDStr string `json:"id_str"` Name string `json:"name"` @@ -195,15 +200,10 @@ type ( Description string `json:"description"` Entities struct { Description struct { - Urls []interface{} `json:"urls"` + Urls []Url `json:"urls"` } `json:"description"` URL struct { - Urls []struct { - DisplayURL string `json:"display_url"` - ExpandedURL string `json:"expanded_url"` - URL string `json:"url"` - Indices []int `json:"indices"` - } `json:"urls"` + Urls []Url `json:"urls"` } `json:"url"` } `json:"entities"` FastFollowersCount int `json:"fast_followers_count"` diff --git a/util.go b/util.go index 0a2f658..f288a13 100644 --- a/util.go +++ b/util.go @@ -157,6 +157,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { if tweetID == "" { return nil } + text := expandURLs(tweet.FullText, tweet.Entities.URLs) username := user.ScreenName name := user.Name tw := &Tweet{ @@ -167,7 +168,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID), Replies: tweet.ReplyCount, Retweets: tweet.RetweetCount, - Text: tweet.FullText, + Text: text, UserID: tweet.UserIDStr, Username: username, } @@ -379,12 +380,21 @@ func parseProfile(user legacyUser) Profile { return profile } +func expandURLs(text string, urls []Url) string { + expandedText := text + for _, url := range urls { + expandedText = strings.ReplaceAll(expandedText, url.URL, url.ExpandedURL) + } + return expandedText +} + func parseProfileV2(user userResult) Profile { u := user.Legacy + description := expandURLs(u.Description, u.Entities.Description.Urls) profile := Profile{ Avatar: u.ProfileImageURLHTTPS, Banner: u.ProfileBannerURL, - Biography: u.Description, + Biography: description, FollowersCount: u.FollowersCount, FollowingCount: u.FavouritesCount, FriendsCount: u.FriendsCount, From cc1eb793d6f90555acff273fc6727aabd7177b1c Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:41:52 +0200 Subject: [PATCH 02/10] feat: expand media URLs in tweet --- timeline_v2.go | 1 + types.go | 38 ++++++++++++++++++++------------------ util.go | 8 ++++++++ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index 2d7f10e..5dbfb8a 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -56,6 +56,7 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } + tw.HTML = expandMediaURLs(tw.HTML, legacy.ExtendedEntities.Media) return tw } diff --git a/types.go b/types.go index 0118fb2..f18f109 100644 --- a/types.go +++ b/types.go @@ -99,6 +99,25 @@ type ( GIFs []GIF } + ExtendedMedia struct { + IDStr string `json:"id_str"` + MediaURLHttps string `json:"media_url_https"` + ExtSensitiveMediaWarning struct { + AdultContent bool `json:"adult_content"` + GraphicViolence bool `json:"graphic_violence"` + Other bool `json:"other"` + } `json:"ext_sensitive_media_warning"` + Type string `json:"type"` + URL string `json:"url"` + VideoInfo struct { + Variants []struct { + Type string `json:"content_type"` + Bitrate int `json:"bitrate"` + URL string `json:"url"` + } `json:"variants"` + } `json:"video_info"` + } + legacyTweet struct { ConversationIDStr string `json:"conversation_id_str"` CreatedAt string `json:"created_at"` @@ -121,24 +140,7 @@ type ( } `json:"user_mentions"` } `json:"entities"` ExtendedEntities struct { - Media []struct { - IDStr string `json:"id_str"` - MediaURLHttps string `json:"media_url_https"` - ExtSensitiveMediaWarning struct { - AdultContent bool `json:"adult_content"` - GraphicViolence bool `json:"graphic_violence"` - Other bool `json:"other"` - } `json:"ext_sensitive_media_warning"` - Type string `json:"type"` - URL string `json:"url"` - VideoInfo struct { - Variants []struct { - Type string `json:"content_type"` - Bitrate int `json:"bitrate"` - URL string `json:"url"` - } `json:"variants"` - } `json:"video_info"` - } `json:"media"` + Media []ExtendedMedia `json:"media"` } `json:"extended_entities"` IDStr string `json:"id_str"` InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"` diff --git a/util.go b/util.go index f288a13..f6f74f5 100644 --- a/util.go +++ b/util.go @@ -388,6 +388,14 @@ func expandURLs(text string, urls []Url) string { return expandedText } +func expandMediaURLs(text string, extendedMediaEntities []ExtendedMedia) string { + expandedText := text + for _, entity := range extendedMediaEntities { + expandedText = strings.ReplaceAll(expandedText, entity.URL, entity.MediaURLHttps) + } + return expandedText +} + func parseProfileV2(user userResult) Profile { u := user.Legacy description := expandURLs(u.Description, u.Entities.Description.Urls) From 40066f125aac0fbc145049e8bcf9de20ce744904 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:57:56 +0200 Subject: [PATCH 03/10] feat: single function to expand URLs --- timeline_v2.go | 3 ++- util.go | 12 ++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index 5dbfb8a..d6fc82e 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -56,7 +56,8 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } - tw.HTML = expandMediaURLs(tw.HTML, legacy.ExtendedEntities.Media) + tw.HTML = expandURLs(tw.HTML, legacy.Entities.URLs, legacy.ExtendedEntities.Media) + tw.HTML = expandURLs(tw.Text, legacy.Entities.URLs, legacy.ExtendedEntities.Media) return tw } diff --git a/util.go b/util.go index f6f74f5..abce237 100644 --- a/util.go +++ b/util.go @@ -157,7 +157,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { if tweetID == "" { return nil } - text := expandURLs(tweet.FullText, tweet.Entities.URLs) + text := expandURLs(tweet.FullText, tweet.Entities.URLs, tweet.ExtendedEntities.Media) username := user.ScreenName name := user.Name tw := &Tweet{ @@ -380,25 +380,21 @@ func parseProfile(user legacyUser) Profile { return profile } -func expandURLs(text string, urls []Url) string { +func expandURLs(text string, urls []Url, extendedMediaEntities []ExtendedMedia) string { expandedText := text for _, url := range urls { expandedText = strings.ReplaceAll(expandedText, url.URL, url.ExpandedURL) } - return expandedText -} - -func expandMediaURLs(text string, extendedMediaEntities []ExtendedMedia) string { - expandedText := text for _, entity := range extendedMediaEntities { expandedText = strings.ReplaceAll(expandedText, entity.URL, entity.MediaURLHttps) } + return expandedText } func parseProfileV2(user userResult) Profile { u := user.Legacy - description := expandURLs(u.Description, u.Entities.Description.Urls) + description := expandURLs(u.Description, u.Entities.Description.Urls, []ExtendedMedia{}) profile := Profile{ Avatar: u.ProfileImageURLHTTPS, Banner: u.ProfileBannerURL, From 7226460e051d65f8a80db72c2e2f5e8abb05c7db Mon Sep 17 00:00:00 2001 From: Brendan Playford <34052452+teslashibe@users.noreply.github.com> Date: Wed, 22 Jan 2025 11:05:38 -0800 Subject: [PATCH 04/10] feat(types): extend Twitter profile data structures - Add new fields to Profile struct (IsBlueVerified, MediaCount, etc.) - Extend legacyUser struct with additional Twitter API fields - Add new types for extended profile, verification and highlights info - Update profile parsing to include new metrics --- profile.go | 53 ++++++++++++++++++--------------- timeline_v2.go | 23 ++++++++++----- types.go | 80 ++++++++++++++++++++++++++++++++++++++++---------- util.go | 41 ++++++++++++++------------ 4 files changed, 131 insertions(+), 66 deletions(-) diff --git a/profile.go b/profile.go index 3fc0661..bba0ee1 100644 --- a/profile.go +++ b/profile.go @@ -14,29 +14,36 @@ var cacheIDs sync.Map // Profile of twitter user. type Profile struct { - Avatar string - Banner string - Biography string - Birthday string - FollowersCount int - FollowingCount int - FriendsCount int - IsPrivate bool - IsVerified bool - Joined *time.Time - LikesCount int - ListedCount int - Location string - Name string - PinnedTweetIDs []string - TweetsCount int - URL string - UserID string - Username string - Website string - Sensitive bool - Following bool - FollowedBy bool + Avatar string + Banner string + Biography string + Birthday string + FollowersCount int + FollowingCount int + FriendsCount int + IsPrivate bool + IsVerified bool + IsBlueVerified bool + Joined *time.Time + LikesCount int + ListedCount int + Location string + Name string + PinnedTweetIDs []string + TweetsCount int + URL string + UserID string + Username string + Website string + Sensitive bool + Following bool + FollowedBy bool + MediaCount int + FastFollowersCount int + NormalFollowersCount int + ProfileImageShape string + HasGraduatedAccess bool + CanHighlightTweets bool } type user struct { diff --git a/timeline_v2.go b/timeline_v2.go index 2d7f10e..372941d 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -60,14 +60,21 @@ func (result *result) parse() *Tweet { } type userResult struct { - Typename string `json:"__typename"` - ID string `json:"id"` - RestID string `json:"rest_id"` - AffiliatesHighlightedLabel struct{} `json:"affiliates_highlighted_label"` - HasGraduatedAccess bool `json:"has_graduated_access"` - IsBlueVerified bool `json:"is_blue_verified"` - ProfileImageShape string `json:"profile_image_shape"` - Legacy legacyUserV2 `json:"legacy"` + Typename string `json:"__typename"` + ID string `json:"id"` + RestID string `json:"rest_id"` + AffiliatesHighlightedLabel struct{} `json:"affiliates_highlighted_label"` + HasGraduatedAccess bool `json:"has_graduated_access"` + IsBlueVerified bool `json:"is_blue_verified"` + ProfileImageShape string `json:"profile_image_shape"` + Legacy legacyUserV2 `json:"legacy"` + LegacyExtendedProfile legacyExtendedProfile `json:"legacy_extended_profile"` + IsProfileTranslatable bool `json:"is_profile_translatable"` + VerificationInfo verificationInfo `json:"verification_info"` + HighlightsInfo highlightsInfo `json:"highlights_info"` + UserSeedTweetCount int `json:"user_seed_tweet_count"` + PremiumGiftingEligible bool `json:"premium_gifting_eligible"` + CreatorSubscriptionsCount int `json:"creator_subscriptions_count"` } func (result *userResult) parse() Profile { diff --git a/types.go b/types.go index 35fccd5..a948b39 100644 --- a/types.go +++ b/types.go @@ -166,22 +166,37 @@ type ( } `json:"urls"` } `json:"url"` } `json:"entities"` - FavouritesCount int `json:"favourites_count"` - FollowersCount int `json:"followers_count"` - FriendsCount int `json:"friends_count"` - IDStr string `json:"id_str"` - ListedCount int `json:"listed_count"` - Name string `json:"name"` - Location string `json:"location"` - PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` - ProfileBannerURL string `json:"profile_banner_url"` - ProfileImageURLHTTPS string `json:"profile_image_url_https"` - Protected bool `json:"protected"` - ScreenName string `json:"screen_name"` - StatusesCount int `json:"statuses_count"` - Verified bool `json:"verified"` - FollowedBy bool `json:"followed_by"` - Following bool `json:"following"` + FavouritesCount int `json:"favourites_count"` + FollowersCount int `json:"followers_count"` + FriendsCount int `json:"friends_count"` + IDStr string `json:"id_str"` + ListedCount int `json:"listed_count"` + Name string `json:"name"` + Location string `json:"location"` + PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"` + ProfileBannerURL string `json:"profile_banner_url"` + ProfileImageURLHTTPS string `json:"profile_image_url_https"` + Protected bool `json:"protected"` + ScreenName string `json:"screen_name"` + StatusesCount int `json:"statuses_count"` + Verified bool `json:"verified"` + FollowedBy bool `json:"followed_by"` + Following bool `json:"following"` + CanDm bool `json:"can_dm"` + CanMediaTag bool `json:"can_media_tag"` + DefaultProfile bool `json:"default_profile"` + DefaultProfileImage bool `json:"default_profile_image"` + FastFollowersCount int `json:"fast_followers_count"` + HasCustomTimelines bool `json:"has_custom_timelines"` + IsTranslator bool `json:"is_translator"` + MediaCount int `json:"media_count"` + NeedsPhoneVerification bool `json:"needs_phone_verification"` + NormalFollowersCount int `json:"normal_followers_count"` + PossiblySensitive bool `json:"possibly_sensitive"` + ProfileInterstitialType string `json:"profile_interstitial_type"` + TranslatorType string `json:"translator_type"` + WantRetweets bool `json:"want_retweets"` + WithheldInCountries []string `json:"withheld_in_countries"` } legacyUserV2 struct { @@ -246,4 +261,37 @@ type ( fetchProfileFunc func(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) fetchTweetFunc func(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) + + legacyExtendedProfile struct { + Birthdate struct { + Day int `json:"day"` + Month int `json:"month"` + Year int `json:"year"` + Visibility string `json:"visibility"` + YearVisibility string `json:"year_visibility"` + } `json:"birthdate"` + } + + verificationInfo struct { + IsIdentityVerified bool `json:"is_identity_verified"` + Reason struct { + Description struct { + Text string `json:"text"` + Entities []struct { + FromIndex int `json:"from_index"` + ToIndex int `json:"to_index"` + Ref struct { + URL string `json:"url"` + URLType string `json:"url_type"` + } `json:"ref"` + } `json:"entities"` + } `json:"description"` + VerifiedSinceMsec string `json:"verified_since_msec"` + } `json:"reason"` + } + + highlightsInfo struct { + CanHighlightTweets bool `json:"can_highlight_tweets"` + HighlightedTweets string `json:"highlighted_tweets"` + } ) diff --git a/util.go b/util.go index 0a2f658..c6a4fb8 100644 --- a/util.go +++ b/util.go @@ -345,25 +345,28 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { func parseProfile(user legacyUser) Profile { profile := Profile{ - Avatar: user.ProfileImageURLHTTPS, - Banner: user.ProfileBannerURL, - Biography: user.Description, - FollowersCount: user.FollowersCount, - FollowingCount: user.FavouritesCount, - FriendsCount: user.FriendsCount, - IsVerified: user.Verified, - IsPrivate: user.Protected, - LikesCount: user.FavouritesCount, - ListedCount: user.ListedCount, - Location: user.Location, - Name: user.Name, - PinnedTweetIDs: user.PinnedTweetIdsStr, - TweetsCount: user.StatusesCount, - URL: "https://twitter.com/" + user.ScreenName, - UserID: user.IDStr, - Username: user.ScreenName, - FollowedBy: user.FollowedBy, - Following: user.Following, + Avatar: user.ProfileImageURLHTTPS, + Banner: user.ProfileBannerURL, + Biography: user.Description, + FollowersCount: user.FollowersCount, + FollowingCount: user.FavouritesCount, + FriendsCount: user.FriendsCount, + IsVerified: user.Verified, + IsPrivate: user.Protected, + LikesCount: user.FavouritesCount, + ListedCount: user.ListedCount, + Location: user.Location, + Name: user.Name, + PinnedTweetIDs: user.PinnedTweetIdsStr, + TweetsCount: user.StatusesCount, + URL: "https://twitter.com/" + user.ScreenName, + UserID: user.IDStr, + Username: user.ScreenName, + FollowedBy: user.FollowedBy, + Following: user.Following, + MediaCount: user.MediaCount, + FastFollowersCount: user.FastFollowersCount, + NormalFollowersCount: user.NormalFollowersCount, } tm, err := time.Parse(time.RubyDate, user.CreatedAt) From 388445d4c2e2d5efa46922772a40dfd13a9995c5 Mon Sep 17 00:00:00 2001 From: Brendan Playford <34052452+teslashibe@users.noreply.github.com> Date: Wed, 22 Jan 2025 11:14:03 -0800 Subject: [PATCH 05/10] feat: add IsBlueVerified to profile responses Add support for Twitter's blue verification status in profile responses: - Add IsBlueVerified field to user struct in GetProfile response - Update parseProfile and parseProfileV2 to include blue verification status - Add ProfileImageShape and HasGraduatedAccess fields to profile responses This change ensures the API correctly returns blue verification status for both direct profile queries and timeline responses. --- profile.go | 15 ++++++++++----- util.go | 41 ++++++++++++++++++++++------------------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/profile.go b/profile.go index bba0ee1..e79a92c 100644 --- a/profile.go +++ b/profile.go @@ -50,9 +50,10 @@ type user struct { Data struct { User struct { Result struct { - RestID string `json:"rest_id"` - Legacy legacyUser `json:"legacy"` - Message string `json:"message"` + RestID string `json:"rest_id"` + Legacy legacyUser `json:"legacy"` + Message string `json:"message"` + IsBlueVerified bool `json:"is_blue_verified"` } `json:"result"` } `json:"user"` } `json:"data"` @@ -118,7 +119,9 @@ func (s *Scraper) GetProfile(username string) (Profile, error) { return Profile{}, fmt.Errorf("either @%s does not exist or is private", username) } - return parseProfile(jsn.Data.User.Result.Legacy), nil + profile := parseProfile(jsn.Data.User.Result.Legacy) + profile.IsBlueVerified = jsn.Data.User.Result.IsBlueVerified + return profile, nil } func (s *Scraper) GetProfileByID(userID string) (Profile, error) { @@ -175,7 +178,9 @@ func (s *Scraper) GetProfileByID(userID string) (Profile, error) { return Profile{}, fmt.Errorf("either @%s does not exist or is private", userID) } - return parseProfile(jsn.Data.User.Result.Legacy), nil + profile := parseProfile(jsn.Data.User.Result.Legacy) + profile.IsBlueVerified = jsn.Data.User.Result.IsBlueVerified + return profile, nil } // GetUserIDByScreenName from API diff --git a/util.go b/util.go index c6a4fb8..26264e0 100644 --- a/util.go +++ b/util.go @@ -385,25 +385,28 @@ func parseProfile(user legacyUser) Profile { func parseProfileV2(user userResult) Profile { u := user.Legacy profile := Profile{ - Avatar: u.ProfileImageURLHTTPS, - Banner: u.ProfileBannerURL, - Biography: u.Description, - FollowersCount: u.FollowersCount, - FollowingCount: u.FavouritesCount, - FriendsCount: u.FriendsCount, - IsVerified: u.Verified, - LikesCount: u.FavouritesCount, - ListedCount: u.ListedCount, - Location: u.Location, - Name: u.Name, - PinnedTweetIDs: u.PinnedTweetIdsStr, - TweetsCount: u.StatusesCount, - URL: "https://twitter.com/" + u.ScreenName, - UserID: user.ID, - Username: u.ScreenName, - Sensitive: u.PossiblySensitive, - Following: u.Following, - FollowedBy: u.FollowedBy, + Avatar: u.ProfileImageURLHTTPS, + Banner: u.ProfileBannerURL, + Biography: u.Description, + FollowersCount: u.FollowersCount, + FollowingCount: u.FavouritesCount, + FriendsCount: u.FriendsCount, + IsVerified: u.Verified, + IsBlueVerified: user.IsBlueVerified, + ProfileImageShape: user.ProfileImageShape, + HasGraduatedAccess: user.HasGraduatedAccess, + LikesCount: u.FavouritesCount, + ListedCount: u.ListedCount, + Location: u.Location, + Name: u.Name, + PinnedTweetIDs: u.PinnedTweetIdsStr, + TweetsCount: u.StatusesCount, + URL: "https://twitter.com/" + u.ScreenName, + UserID: user.ID, + Username: u.ScreenName, + Sensitive: u.PossiblySensitive, + Following: u.Following, + FollowedBy: u.FollowedBy, } tm, err := time.Parse(time.RubyDate, u.CreatedAt) From 4276e243ac1c4631354f26800aceccebb97ccf86 Mon Sep 17 00:00:00 2001 From: Valentine Date: Wed, 22 Jan 2025 23:57:00 +0300 Subject: [PATCH 06/10] fix test fail --- profile_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/profile_test.go b/profile_test.go index eb82891..6b7b940 100644 --- a/profile_test.go +++ b/profile_test.go @@ -42,6 +42,8 @@ func TestGetProfile(t *testing.T) { cmpopts.IgnoreFields(twitterscraper.Profile{}, "LikesCount"), cmpopts.IgnoreFields(twitterscraper.Profile{}, "ListedCount"), cmpopts.IgnoreFields(twitterscraper.Profile{}, "TweetsCount"), + cmpopts.IgnoreFields(twitterscraper.Profile{}, "MediaCount"), + cmpopts.IgnoreFields(twitterscraper.Profile{}, "NormalFollowersCount"), } if diff := cmp.Diff(sample, profile, cmpOptions...); diff != "" { t.Error("Resulting profile does not match the sample", diff) @@ -94,6 +96,8 @@ func TestGetProfilePrivate(t *testing.T) { cmpopts.IgnoreFields(twitterscraper.Profile{}, "LikesCount"), cmpopts.IgnoreFields(twitterscraper.Profile{}, "ListedCount"), cmpopts.IgnoreFields(twitterscraper.Profile{}, "TweetsCount"), + cmpopts.IgnoreFields(twitterscraper.Profile{}, "MediaCount"), + cmpopts.IgnoreFields(twitterscraper.Profile{}, "NormalFollowersCount"), } if diff := cmp.Diff(sample, profile, cmpOptions...); diff != "" { t.Error("Resulting profile does not match the sample", diff) From 44183226af2de76c0e90890ecaa89f6a104070cf Mon Sep 17 00:00:00 2001 From: Valentine Date: Thu, 23 Jan 2025 00:00:32 +0300 Subject: [PATCH 07/10] update twitter account in test --- profile_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profile_test.go b/profile_test.go index 6b7b940..7e9a766 100644 --- a/profile_test.go +++ b/profile_test.go @@ -150,7 +150,7 @@ func TestGetProfileByID(t *testing.T) { } func TestGetUserIDByScreenName(t *testing.T) { - userID, err := testScraper.GetUserIDByScreenName("Twitter") + userID, err := testScraper.GetUserIDByScreenName("X") if err != nil { t.Errorf("getUserByScreenName() error = %v", err) } From 3684e678e39847626678c5d58f4df7c352ddb740 Mon Sep 17 00:00:00 2001 From: Valentine Date: Thu, 23 Jan 2025 03:13:18 +0300 Subject: [PATCH 08/10] update tests --- timeline_v2.go | 2 -- tweets_test.go | 16 ++++++++-------- util.go | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index 3246b03..372941d 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -56,8 +56,6 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } - tw.HTML = expandURLs(tw.HTML, legacy.Entities.URLs, legacy.ExtendedEntities.Media) - tw.HTML = expandURLs(tw.Text, legacy.Entities.URLs, legacy.ExtendedEntities.Media) return tw } diff --git a/tweets_test.go b/tweets_test.go index 3b916f4..aec02e9 100644 --- a/tweets_test.go +++ b/tweets_test.go @@ -92,7 +92,7 @@ func TestGetTweetWithVideo(t *testing.T) { Name: "X", PermanentURL: "https://twitter.com/X/status/1697304622749086011", Photos: nil, - Text: "on iOS & Android, you can now swipe to reply when you slide into their DMs https://t.co/evuWpMfBxQ", + Text: "on iOS & Android, you can now swipe to reply when you slide into their DMs https://pbs.twimg.com/amplify_video_thumb/1697304568550330368/img/BUlESpef6FmWV_j2.jpg", Timestamp: 1693503931, UserID: "783214", Username: "X", @@ -125,7 +125,7 @@ func TestGetTweetWithMultiplePhotos(t *testing.T) { URL: "https://pbs.twimg.com/media/FeUJKuxXEAAa6t7.jpg", }, }, - Text: "More ways to discover videos on Twitter are here!\n\nNow on iOS, videos on your timeline will open in our full screen immersive video player, where you can swipe up to keep discovering more content. https://t.co/XI2vM8DKXA", + Text: "More ways to discover videos on Twitter are here!\n\nNow on iOS, videos on your timeline will open in our full screen immersive video player, where you can swipe up to keep discovering more content. https://pbs.twimg.com/media/FeUJKdnXEAEFe2j.jpg", Timestamp: 1664982561, UserID: "17874544", Username: "Support", @@ -147,7 +147,7 @@ func TestGetTweetWithGIF(t *testing.T) { ID: "1517535384833605632", Name: "Support", PermanentURL: "https://twitter.com/Support/status/1517535384833605632", - Text: "Video captions or no captions, it’s now easier to choose for some of you on iOS, and soon on Android.\n\nOn videos that have captions available, we’re testing the option to turn captions off/on with a new “CC” button. https://t.co/Q2Q2Wmr78U", + Text: "Video captions or no captions, it’s now easier to choose for some of you on iOS, and soon on Android.\n\nOn videos that have captions available, we’re testing the option to turn captions off/on with a new “CC” button. https://pbs.twimg.com/tweet_video_thumb/FQ9eXEhXEAA-haj.jpg", Timestamp: 1650643604, UserID: "17874544", Username: "Support", @@ -170,7 +170,7 @@ func TestGetTweetWithPhotoAndGIF(t *testing.T) { Name: "Spaces", PermanentURL: "https://twitter.com/XSpaces/status/1583186305722507265", Photos: []twitterscraper.Photo{{ID: "1583186295626539020", URL: "https://pbs.twimg.com/media/FfibjDwWIAwvbtJ.jpg"}}, - Text: "“we need to talk” \n\nirl vs on Spaces https://t.co/hrflPpbpif", + Text: "“we need to talk” \n\nirl vs on Spaces https://pbs.twimg.com/tweet_video_thumb/FfibjDnWIBIt5fn.jpg", Timestamp: 1666296004, UserID: "1065249714214457345", Username: "XSpaces", @@ -197,7 +197,7 @@ func TestTweetMentions(t *testing.T) { func TestQuotedAndReply(t *testing.T) { sample := &twitterscraper.Tweet{ ConversationID: "1237110546383724547", - HTML: "The Easiest Problem Everyone Gets Wrong

[new video] --> https://t.co/YdaeDYmPAU
", + HTML: "The Easiest Problem Everyone Gets Wrong

[new video] --> https://youtu.be/ytfCdqWhmdg
", ID: "1237110546383724547", Likes: 485, Name: "Vsauce2", @@ -208,7 +208,7 @@ func TestQuotedAndReply(t *testing.T) { }}, Replies: 12, Retweets: 18, - Text: "The Easiest Problem Everyone Gets Wrong \n\n[new video] --> https://t.co/YdaeDYmPAU https://t.co/iKu4Xs6o2V", + Text: "The Easiest Problem Everyone Gets Wrong \n\n[new video] --> https://youtu.be/ytfCdqWhmdg https://pbs.twimg.com/media/ESsZa9AXgAIAYnF.jpg", Timestamp: 1583785113, URLs: []string{"https://youtu.be/ytfCdqWhmdg"}, UserID: "978944851", @@ -241,13 +241,13 @@ func TestQuotedAndReply(t *testing.T) { func TestRetweet(t *testing.T) { sample := &twitterscraper.Tweet{ ConversationID: "1758837061786779942", - HTML: "no ads, just bangers

aka your For You feed with Premium+

subscribe here → https://t.co/APTO1t7kMk", + HTML: "no ads, just bangers

aka your For You feed with Premium+

subscribe here → https://x.com/i/premium_sign_up", ID: "1758837061786779942", URLs: []string{"https://x.com/i/premium_sign_up"}, IsSelfThread: false, Name: "Premium", PermanentURL: "https://twitter.com/premium/status/1758837061786779942", - Text: "no ads, just bangers\n\naka your For You feed with Premium+\n\nsubscribe here → https://t.co/APTO1t7kMk", + Text: "no ads, just bangers\n\naka your For You feed with Premium+\n\nsubscribe here → https://x.com/i/premium_sign_up", Timestamp: 1708174407, UserID: "1399766153053061121", Username: "premium", diff --git a/util.go b/util.go index e7e5311..525065c 100644 --- a/util.go +++ b/util.go @@ -308,7 +308,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string { for _, entity := range tweet.Entities.URLs { if tco == entity.URL { - return fmt.Sprintf(`%s`, entity.ExpandedURL, tco) + return fmt.Sprintf(`%s`, entity.ExpandedURL, entity.ExpandedURL) } } for _, entity := range tweet.ExtendedEntities.Media { From a48dcf2587ccd9246e8ec10f9628871d10567359 Mon Sep 17 00:00:00 2001 From: Valentine Date: Fri, 28 Mar 2025 14:14:16 +0300 Subject: [PATCH 09/10] parse videos from cards --- timeline_v2.go | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/timeline_v2.go b/timeline_v2.go index 372941d..324b0b5 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -1,6 +1,7 @@ package twitterscraper import ( + "encoding/json" "strconv" "strings" ) @@ -28,6 +29,17 @@ type tweet struct { Result *result `json:"result"` } `json:"quoted_status_result"` Legacy legacyTweet `json:"legacy"` + Card struct { + RestID string `json:"rest_id"` + Legacy struct { + BindingValues []struct { + Key string `json:"key"` + Value struct { + StringValue string `json:"string_value"` + } `json:"value"` + } `json:"binding_values"` + } `json:"legacy"` + } `json:"card"` } type result struct { @@ -36,6 +48,31 @@ type result struct { Tweet tweet `json:"tweet"` } +type UnifiedCard struct { + Type string `json:"type"` + Components []string `json:"components"` + MediaEntities map[string]struct { + ID int64 `json:"id"` + IDStr string `json:"id_str"` + MediaURLHTTPS string `json:"media_url_https"` + Type string `json:"type"` + OriginalInfo struct { + Width int `json:"width"` + Height int `json:"height"` + } `json:"original_info"` + SourceUserID int64 `json:"source_user_id"` + VideoInfo struct { + AspectRatio []int `json:"aspect_ratio"` + DurationMillis int `json:"duration_millis"` + Variants []struct { + Bitrate int `json:"bitrate,omitempty"` + ContentType string `json:"content_type"` + URL string `json:"url"` + } `json:"variants"` + } `json:"video_info"` + } `json:"media_entities"` +} + func (result *result) parse() *Tweet { if result.NoteTweet.NoteTweetResults.Result.Text != "" { result.Legacy.FullText = result.NoteTweet.NoteTweetResults.Result.Text @@ -56,6 +93,43 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } + + // Get videos from cards + for _, v := range result.Tweet.Card.Legacy.BindingValues { + if v.Key == "unified_card" { + var card UnifiedCard + err := json.Unmarshal([]byte(v.Value.StringValue), &card) + if err != nil { + continue + } + + for _, media := range card.MediaEntities { + if media.Type == "video" { + var vid Video + + vid.ID = media.IDStr + vid.Preview = media.MediaURLHTTPS + + var bitrate int + for _, variant := range media.VideoInfo.Variants { + if variant.ContentType == "video/mp4" { + if variant.Bitrate > bitrate { + bitrate = variant.Bitrate + vid.URL = variant.URL + } + } else if variant.ContentType == "application/x-mpegURL" { + vid.HLSURL = variant.URL + } + } + + if vid.URL != "" { + tw.Videos = append(tw.Videos, vid) + } + } + } + } + } + return tw } From d31ee7d09fcf044d6288e2986602e9d992b46390 Mon Sep 17 00:00:00 2001 From: imperatrona Date: Wed, 30 Apr 2025 01:03:40 +0300 Subject: [PATCH 10/10] rm bearerToken2 --- auth.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/auth.go b/auth.go index d0a1727..b241555 100644 --- a/auth.go +++ b/auth.go @@ -24,8 +24,9 @@ const ( oAuthURL = "https://api.twitter.com/oauth2/token" // Doesn't require x-client-transaction-id header in auth. x-rate-limit-limit: 2000 bearerToken1 = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF" - // Requires x-client-transaction-id header in auth. - bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + // HOTFIX: Returns 404 error; Requires x-client-transaction-id header in auth. + // bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + bearerToken2 = "AAAAAAAAAAAAAAAAAAAAAFQODgEAAAAAVHTp76lzh3rFzcHbmHVvQxYYpTw%3DckAlMINMjmCwxUcaXbAN4XqJVdgMJaHqNOFgPMK0zN1qLqLQCF" appConsumerKey = "3nVuSoBZnx6U4vzUxf5w" appConsumerSecret = "Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys" )