Returned the HTML field of the tweet

This commit is contained in:
Alexander Sheiko 2020-12-17 10:39:45 +02:00
parent 6414a834ea
commit 29ee72026a
2 changed files with 36 additions and 0 deletions

View file

@ -13,6 +13,7 @@ type (
// Tweet type.
Tweet struct {
Hashtags []string
HTML string
ID string
IsQuoted bool
IsPin bool
@ -53,9 +54,11 @@ type (
Media []struct {
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
URL string `json:"url"`
} `json:"media"`
URLs []struct {
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
} `json:"urls"`
} `json:"entities"`
ExtendedEntities struct {

33
util.go
View file

@ -4,11 +4,17 @@ import (
"context"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"time"
)
var (
reHashtag = regexp.MustCompile(`\B(\#[a-zA-Z]+\b)`)
reTwitterURL = regexp.MustCompile(`https:(\/\/t\.co\/([A-Za-z0-9]|[A-Za-z]){10})`)
)
func (s *Scraper) newRequest(method string, url string) (*http.Request, error) {
req, err := http.NewRequest(method, url, nil)
if err != nil {
@ -102,11 +108,13 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
UserID: tweet.UserIDStr,
Username: username,
}
tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
if err == nil {
tw.TimeParsed = tm
tw.Timestamp = tm.Unix()
}
if tweet.QuotedStatusIDStr != "" {
tw.IsQuoted = true
}
@ -116,12 +124,14 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
if tweet.RetweetedStatusIDStr != "" {
tw.IsRetweet = true
}
for _, pinned := range timeline.GlobalObjects.Users[tweet.UserIDStr].PinnedTweetIdsStr {
if tweet.ConversationIDStr == pinned {
tw.IsPin = true
break
}
}
for _, hash := range tweet.Entities.Hashtags {
tw.Hashtags = append(tw.Hashtags, hash.Text)
}
@ -148,6 +158,29 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
for _, url := range tweet.Entities.URLs {
tw.URLs = append(tw.URLs, url.ExpandedURL)
}
tw.HTML = tweet.FullText
tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
return fmt.Sprintf(`<a href="https://twitter.com/hashtag/%s">%s</a>`,
strings.TrimPrefix(hashtag, "#"),
hashtag,
)
})
tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
for _, entity := range tweet.Entities.URLs {
if tco == entity.URL {
return fmt.Sprintf(`<a href="%s">%s</a>`, entity.ExpandedURL, tco)
}
}
for _, entity := range tweet.Entities.Media {
if tco == entity.URL {
return fmt.Sprintf(`<a href="%s"><img src="%s"/></a>`, tco, entity.MediaURLHttps)
}
}
return tco
})
tw.HTML = strings.Replace(tw.HTML, "\n", "<br>", -1)
tweets[tw.ID] = tw
}