From 9f31f3890f033ca64c125bba13e43a556dbe5ddc Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Sun, 13 Oct 2024 14:47:05 +0300 Subject: [PATCH 1/3] feat: expand URLs for profile and tweets --- types.go | 22 +++++++++++----------- util.go | 14 ++++++++++++-- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/types.go b/types.go index 35fccd5..0118fb2 100644 --- a/types.go +++ b/types.go @@ -10,6 +10,14 @@ type ( Name string } + // Url represents a URL with display, expanded, and index data. + Url struct { + DisplayURL string `json:"display_url"` + ExpandedURL string `json:"expanded_url"` + URL string `json:"url"` + Indices []int `json:"indices"` + } + // Photo type. Photo struct { ID string @@ -105,10 +113,7 @@ type ( Type string `json:"type"` URL string `json:"url"` } `json:"media"` - URLs []struct { - ExpandedURL string `json:"expanded_url"` - URL string `json:"url"` - } `json:"urls"` + URLs []Url `json:"urls"` UserMentions []struct { IDStr string `json:"id_str"` Name string `json:"name"` @@ -195,15 +200,10 @@ type ( Description string `json:"description"` Entities struct { Description struct { - Urls []interface{} `json:"urls"` + Urls []Url `json:"urls"` } `json:"description"` URL struct { - Urls []struct { - DisplayURL string `json:"display_url"` - ExpandedURL string `json:"expanded_url"` - URL string `json:"url"` - Indices []int `json:"indices"` - } `json:"urls"` + Urls []Url `json:"urls"` } `json:"url"` } `json:"entities"` FastFollowersCount int `json:"fast_followers_count"` diff --git a/util.go b/util.go index 0a2f658..f288a13 100644 --- a/util.go +++ b/util.go @@ -157,6 +157,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { if tweetID == "" { return nil } + text := expandURLs(tweet.FullText, tweet.Entities.URLs) username := user.ScreenName name := user.Name tw := &Tweet{ @@ -167,7 +168,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID), Replies: tweet.ReplyCount, Retweets: tweet.RetweetCount, - Text: tweet.FullText, + Text: text, UserID: tweet.UserIDStr, Username: username, } @@ -379,12 +380,21 @@ func parseProfile(user legacyUser) Profile { return profile } +func expandURLs(text string, urls []Url) string { + expandedText := text + for _, url := range urls { + expandedText = strings.ReplaceAll(expandedText, url.URL, url.ExpandedURL) + } + return expandedText +} + func parseProfileV2(user userResult) Profile { u := user.Legacy + description := expandURLs(u.Description, u.Entities.Description.Urls) profile := Profile{ Avatar: u.ProfileImageURLHTTPS, Banner: u.ProfileBannerURL, - Biography: u.Description, + Biography: description, FollowersCount: u.FollowersCount, FollowingCount: u.FavouritesCount, FriendsCount: u.FriendsCount, From cc1eb793d6f90555acff273fc6727aabd7177b1c Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:41:52 +0200 Subject: [PATCH 2/3] feat: expand media URLs in tweet --- timeline_v2.go | 1 + types.go | 38 ++++++++++++++++++++------------------ util.go | 8 ++++++++ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index 2d7f10e..5dbfb8a 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -56,6 +56,7 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } + tw.HTML = expandMediaURLs(tw.HTML, legacy.ExtendedEntities.Media) return tw } diff --git a/types.go b/types.go index 0118fb2..f18f109 100644 --- a/types.go +++ b/types.go @@ -99,6 +99,25 @@ type ( GIFs []GIF } + ExtendedMedia struct { + IDStr string `json:"id_str"` + MediaURLHttps string `json:"media_url_https"` + ExtSensitiveMediaWarning struct { + AdultContent bool `json:"adult_content"` + GraphicViolence bool `json:"graphic_violence"` + Other bool `json:"other"` + } `json:"ext_sensitive_media_warning"` + Type string `json:"type"` + URL string `json:"url"` + VideoInfo struct { + Variants []struct { + Type string `json:"content_type"` + Bitrate int `json:"bitrate"` + URL string `json:"url"` + } `json:"variants"` + } `json:"video_info"` + } + legacyTweet struct { ConversationIDStr string `json:"conversation_id_str"` CreatedAt string `json:"created_at"` @@ -121,24 +140,7 @@ type ( } `json:"user_mentions"` } `json:"entities"` ExtendedEntities struct { - Media []struct { - IDStr string `json:"id_str"` - MediaURLHttps string `json:"media_url_https"` - ExtSensitiveMediaWarning struct { - AdultContent bool `json:"adult_content"` - GraphicViolence bool `json:"graphic_violence"` - Other bool `json:"other"` - } `json:"ext_sensitive_media_warning"` - Type string `json:"type"` - URL string `json:"url"` - VideoInfo struct { - Variants []struct { - Type string `json:"content_type"` - Bitrate int `json:"bitrate"` - URL string `json:"url"` - } `json:"variants"` - } `json:"video_info"` - } `json:"media"` + Media []ExtendedMedia `json:"media"` } `json:"extended_entities"` IDStr string `json:"id_str"` InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"` diff --git a/util.go b/util.go index f288a13..f6f74f5 100644 --- a/util.go +++ b/util.go @@ -388,6 +388,14 @@ func expandURLs(text string, urls []Url) string { return expandedText } +func expandMediaURLs(text string, extendedMediaEntities []ExtendedMedia) string { + expandedText := text + for _, entity := range extendedMediaEntities { + expandedText = strings.ReplaceAll(expandedText, entity.URL, entity.MediaURLHttps) + } + return expandedText +} + func parseProfileV2(user userResult) Profile { u := user.Legacy description := expandURLs(u.Description, u.Entities.Description.Urls) From 40066f125aac0fbc145049e8bcf9de20ce744904 Mon Sep 17 00:00:00 2001 From: thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> Date: Fri, 8 Nov 2024 21:57:56 +0200 Subject: [PATCH 3/3] feat: single function to expand URLs --- timeline_v2.go | 3 ++- util.go | 12 ++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index 5dbfb8a..d6fc82e 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -56,7 +56,8 @@ func (result *result) parse() *Tweet { if result.QuotedStatusResult.Result != nil { tw.QuotedStatus = result.QuotedStatusResult.Result.parse() } - tw.HTML = expandMediaURLs(tw.HTML, legacy.ExtendedEntities.Media) + tw.HTML = expandURLs(tw.HTML, legacy.Entities.URLs, legacy.ExtendedEntities.Media) + tw.HTML = expandURLs(tw.Text, legacy.Entities.URLs, legacy.ExtendedEntities.Media) return tw } diff --git a/util.go b/util.go index f6f74f5..abce237 100644 --- a/util.go +++ b/util.go @@ -157,7 +157,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { if tweetID == "" { return nil } - text := expandURLs(tweet.FullText, tweet.Entities.URLs) + text := expandURLs(tweet.FullText, tweet.Entities.URLs, tweet.ExtendedEntities.Media) username := user.ScreenName name := user.Name tw := &Tweet{ @@ -380,25 +380,21 @@ func parseProfile(user legacyUser) Profile { return profile } -func expandURLs(text string, urls []Url) string { +func expandURLs(text string, urls []Url, extendedMediaEntities []ExtendedMedia) string { expandedText := text for _, url := range urls { expandedText = strings.ReplaceAll(expandedText, url.URL, url.ExpandedURL) } - return expandedText -} - -func expandMediaURLs(text string, extendedMediaEntities []ExtendedMedia) string { - expandedText := text for _, entity := range extendedMediaEntities { expandedText = strings.ReplaceAll(expandedText, entity.URL, entity.MediaURLHttps) } + return expandedText } func parseProfileV2(user userResult) Profile { u := user.Legacy - description := expandURLs(u.Description, u.Entities.Description.Urls) + description := expandURLs(u.Description, u.Entities.Description.Urls, []ExtendedMedia{}) profile := Profile{ Avatar: u.ProfileImageURLHTTPS, Banner: u.ProfileBannerURL,