Merge pull request #24 from thewh1teagle/feat/expand-description-urls

feat: expand description URLs
This commit is contained in:
Valentine 2025-01-23 03:03:43 +03:00 committed by GitHub
commit c269a9ba92
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 49 additions and 31 deletions

View file

@ -56,6 +56,8 @@ func (result *result) parse() *Tweet {
if result.QuotedStatusResult.Result != nil { if result.QuotedStatusResult.Result != nil {
tw.QuotedStatus = result.QuotedStatusResult.Result.parse() tw.QuotedStatus = result.QuotedStatusResult.Result.parse()
} }
tw.HTML = expandURLs(tw.HTML, legacy.Entities.URLs, legacy.ExtendedEntities.Media)
tw.HTML = expandURLs(tw.Text, legacy.Entities.URLs, legacy.ExtendedEntities.Media)
return tw return tw
} }

View file

@ -10,6 +10,14 @@ type (
Name string Name string
} }
// Url represents a URL with display, expanded, and index data.
Url struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
Indices []int `json:"indices"`
}
// Photo type. // Photo type.
Photo struct { Photo struct {
ID string ID string
@ -91,6 +99,25 @@ type (
GIFs []GIF GIFs []GIF
} }
ExtendedMedia struct {
IDStr string `json:"id_str"`
MediaURLHttps string `json:"media_url_https"`
ExtSensitiveMediaWarning struct {
AdultContent bool `json:"adult_content"`
GraphicViolence bool `json:"graphic_violence"`
Other bool `json:"other"`
} `json:"ext_sensitive_media_warning"`
Type string `json:"type"`
URL string `json:"url"`
VideoInfo struct {
Variants []struct {
Type string `json:"content_type"`
Bitrate int `json:"bitrate"`
URL string `json:"url"`
} `json:"variants"`
} `json:"video_info"`
}
legacyTweet struct { legacyTweet struct {
ConversationIDStr string `json:"conversation_id_str"` ConversationIDStr string `json:"conversation_id_str"`
CreatedAt string `json:"created_at"` CreatedAt string `json:"created_at"`
@ -105,10 +132,7 @@ type (
Type string `json:"type"` Type string `json:"type"`
URL string `json:"url"` URL string `json:"url"`
} `json:"media"` } `json:"media"`
URLs []struct { URLs []Url `json:"urls"`
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
} `json:"urls"`
UserMentions []struct { UserMentions []struct {
IDStr string `json:"id_str"` IDStr string `json:"id_str"`
Name string `json:"name"` Name string `json:"name"`
@ -116,24 +140,7 @@ type (
} `json:"user_mentions"` } `json:"user_mentions"`
} `json:"entities"` } `json:"entities"`
ExtendedEntities struct { ExtendedEntities struct {
Media []struct { Media []ExtendedMedia `json:"media"`
IDStr string `json:"id_str"`
MediaURLHttps string `json:"media_url_https"`
ExtSensitiveMediaWarning struct {
AdultContent bool `json:"adult_content"`
GraphicViolence bool `json:"graphic_violence"`
Other bool `json:"other"`
} `json:"ext_sensitive_media_warning"`
Type string `json:"type"`
URL string `json:"url"`
VideoInfo struct {
Variants []struct {
Type string `json:"content_type"`
Bitrate int `json:"bitrate"`
URL string `json:"url"`
} `json:"variants"`
} `json:"video_info"`
} `json:"media"`
} `json:"extended_entities"` } `json:"extended_entities"`
IDStr string `json:"id_str"` IDStr string `json:"id_str"`
InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"` InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"`
@ -210,15 +217,10 @@ type (
Description string `json:"description"` Description string `json:"description"`
Entities struct { Entities struct {
Description struct { Description struct {
Urls []interface{} `json:"urls"` Urls []Url `json:"urls"`
} `json:"description"` } `json:"description"`
URL struct { URL struct {
Urls []struct { Urls []Url `json:"urls"`
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
Indices []int `json:"indices"`
} `json:"urls"`
} `json:"url"` } `json:"url"`
} `json:"entities"` } `json:"entities"`
FastFollowersCount int `json:"fast_followers_count"` FastFollowersCount int `json:"fast_followers_count"`

18
util.go
View file

@ -157,6 +157,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
if tweetID == "" { if tweetID == "" {
return nil return nil
} }
text := expandURLs(tweet.FullText, tweet.Entities.URLs, tweet.ExtendedEntities.Media)
username := user.ScreenName username := user.ScreenName
name := user.Name name := user.Name
tw := &Tweet{ tw := &Tweet{
@ -167,7 +168,7 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID), PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID),
Replies: tweet.ReplyCount, Replies: tweet.ReplyCount,
Retweets: tweet.RetweetCount, Retweets: tweet.RetweetCount,
Text: tweet.FullText, Text: text,
UserID: tweet.UserIDStr, UserID: tweet.UserIDStr,
Username: username, Username: username,
} }
@ -382,12 +383,25 @@ func parseProfile(user legacyUser) Profile {
return profile return profile
} }
func expandURLs(text string, urls []Url, extendedMediaEntities []ExtendedMedia) string {
expandedText := text
for _, url := range urls {
expandedText = strings.ReplaceAll(expandedText, url.URL, url.ExpandedURL)
}
for _, entity := range extendedMediaEntities {
expandedText = strings.ReplaceAll(expandedText, entity.URL, entity.MediaURLHttps)
}
return expandedText
}
func parseProfileV2(user userResult) Profile { func parseProfileV2(user userResult) Profile {
u := user.Legacy u := user.Legacy
description := expandURLs(u.Description, u.Entities.Description.Urls, []ExtendedMedia{})
profile := Profile{ profile := Profile{
Avatar: u.ProfileImageURLHTTPS, Avatar: u.ProfileImageURLHTTPS,
Banner: u.ProfileBannerURL, Banner: u.ProfileBannerURL,
Biography: u.Description, Biography: description,
FollowersCount: u.FollowersCount, FollowersCount: u.FollowersCount,
FollowingCount: u.FavouritesCount, FollowingCount: u.FavouritesCount,
FriendsCount: u.FriendsCount, FriendsCount: u.FriendsCount,