diff --git a/types.go b/types.go
index 13f5377..3ff206f 100644
--- a/types.go
+++ b/types.go
@@ -13,6 +13,7 @@ type (
// Tweet type.
Tweet struct {
Hashtags []string
+ HTML string
ID string
IsQuoted bool
IsPin bool
@@ -53,9 +54,11 @@ type (
Media []struct {
MediaURLHttps string `json:"media_url_https"`
Type string `json:"type"`
+ URL string `json:"url"`
} `json:"media"`
URLs []struct {
ExpandedURL string `json:"expanded_url"`
+ URL string `json:"url"`
} `json:"urls"`
} `json:"entities"`
ExtendedEntities struct {
diff --git a/util.go b/util.go
index f3c7f82..82f93d5 100644
--- a/util.go
+++ b/util.go
@@ -4,11 +4,17 @@ import (
"context"
"fmt"
"net/http"
+ "regexp"
"strconv"
"strings"
"time"
)
+var (
+ reHashtag = regexp.MustCompile(`\B(\#[a-zA-Z]+\b)`)
+ reTwitterURL = regexp.MustCompile(`https:(\/\/t\.co\/([A-Za-z0-9]|[A-Za-z]){10})`)
+)
+
func (s *Scraper) newRequest(method string, url string) (*http.Request, error) {
req, err := http.NewRequest(method, url, nil)
if err != nil {
@@ -102,11 +108,13 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
UserID: tweet.UserIDStr,
Username: username,
}
+
tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
if err == nil {
tw.TimeParsed = tm
tw.Timestamp = tm.Unix()
}
+
if tweet.QuotedStatusIDStr != "" {
tw.IsQuoted = true
}
@@ -116,12 +124,14 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
if tweet.RetweetedStatusIDStr != "" {
tw.IsRetweet = true
}
+
for _, pinned := range timeline.GlobalObjects.Users[tweet.UserIDStr].PinnedTweetIdsStr {
if tweet.ConversationIDStr == pinned {
tw.IsPin = true
break
}
}
+
for _, hash := range tweet.Entities.Hashtags {
tw.Hashtags = append(tw.Hashtags, hash.Text)
}
@@ -148,6 +158,29 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
for _, url := range tweet.Entities.URLs {
tw.URLs = append(tw.URLs, url.ExpandedURL)
}
+
+ tw.HTML = tweet.FullText
+ tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
+ return fmt.Sprintf(`%s`,
+ strings.TrimPrefix(hashtag, "#"),
+ hashtag,
+ )
+ })
+ tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
+ for _, entity := range tweet.Entities.URLs {
+ if tco == entity.URL {
+ return fmt.Sprintf(`%s`, entity.ExpandedURL, tco)
+ }
+ }
+ for _, entity := range tweet.Entities.Media {
+ if tco == entity.URL {
+ return fmt.Sprintf(`
`, tco, entity.MediaURLHttps)
+ }
+ }
+ return tco
+ })
+ tw.HTML = strings.Replace(tw.HTML, "\n", "
", -1)
+
tweets[tw.ID] = tw
}