Quick dirty fix GetTweets
This commit is contained in:
parent
c181d32eee
commit
69c8b035ea
3 changed files with 51 additions and 54 deletions
97
tweets.go
97
tweets.go
|
|
@ -3,7 +3,7 @@ package twitterscraper
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -61,8 +61,7 @@ func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Resul
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
query := fmt.Sprintf("(from:%s)", user)
|
tweets, err := FetchTweets(user, lastTweetID)
|
||||||
tweets, err := FetchSearchTweets(query, lastTweetID)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
channel <- &Result{Error: err}
|
channel <- &Result{Error: err}
|
||||||
return
|
return
|
||||||
|
|
@ -94,24 +93,22 @@ func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Resul
|
||||||
|
|
||||||
// FetchTweets gets tweets for a given user, via the Twitter frontend API.
|
// FetchTweets gets tweets for a given user, via the Twitter frontend API.
|
||||||
func FetchTweets(user string, last string) ([]*Tweet, error) {
|
func FetchTweets(user string, last string) ([]*Tweet, error) {
|
||||||
|
req, err := http.NewRequest("GET", "https://syndication.twitter.com/timeline/profile", nil)
|
||||||
req, err := newRequest(fmt.Sprintf(ajaxURL, user))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req.Header.Set("Referer", "https://twitter.com/"+user)
|
req.Header.Set("Referer", "https://publish.twitter.com/")
|
||||||
|
|
||||||
q := req.URL.Query()
|
q := req.URL.Query()
|
||||||
q.Add("include_available_features", "1")
|
q.Add("screen_name", user)
|
||||||
q.Add("include_entities", "1")
|
q.Add("with_replies", "true")
|
||||||
q.Add("include_new_items_bar", "true")
|
|
||||||
if last != "" {
|
if last != "" {
|
||||||
q.Add("max_position", last)
|
q.Add("max_position", last)
|
||||||
}
|
}
|
||||||
req.URL.RawQuery = q.Encode()
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
htm, err := getHTMLFromJSON(req, "items_html")
|
htm, err := getHTMLFromJSON(req, "body")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
@ -132,39 +129,39 @@ func readTweetsFromHTML(htm *strings.Reader) ([]*Tweet, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
|
doc.Find(".timeline-Tweet").Each(func(i int, s *goquery.Selection) {
|
||||||
var tweet Tweet
|
var tweet Tweet
|
||||||
timeStr, ok := s.Find("._timestamp").Attr("data-time")
|
timeStr, ok := s.Find(".timeline-Tweet-metadata > a > time").Attr("datetime")
|
||||||
if ok {
|
if ok {
|
||||||
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
|
tweet.TimeParsed, _ = time.Parse("2006-01-02T15:04:05-0700", timeStr)
|
||||||
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
|
tweet.Timestamp = tweet.TimeParsed.Unix()
|
||||||
tweet.ID = s.AttrOr("data-item-id", "")
|
tweet.ID = s.AttrOr("data-tweet-id", "")
|
||||||
tweet.UserID = s.Find(".tweet").AttrOr("data-user-id", "")
|
//tweet.UserID = s.Find(".tweet").AttrOr("data-user-id", "")
|
||||||
tweet.Username = s.Find(".tweet").AttrOr("data-screen-name", "")
|
tweet.Username = strings.TrimPrefix(s.Find(".TweetAuthor-screenName").AttrOr("title", ""), "@")
|
||||||
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", tweet.Username, tweet.ID)
|
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", tweet.Username, tweet.ID)
|
||||||
tweet.Text = s.Find(".tweet-text").Text()
|
tweet.Text = s.Find(".timeline-Tweet-text").Text()
|
||||||
tweet.HTML, _ = s.Find(".tweet-text").Html()
|
tweet.HTML, _ = s.Find(".timeline-Tweet-text").Html()
|
||||||
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
|
s.Find(".timeline-Tweet-retweetCredit").Each(func(i int, c *goquery.Selection) {
|
||||||
tweet.IsRetweet = true
|
tweet.IsRetweet = true
|
||||||
})
|
})
|
||||||
s.Find("span.js-pinned-text").Each(func(i int, c *goquery.Selection) {
|
// s.Find("span.js-pinned-text").Each(func(i int, c *goquery.Selection) {
|
||||||
tweet.IsPin = true
|
// tweet.IsPin = true
|
||||||
})
|
// })
|
||||||
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
|
// s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
|
||||||
txt := strings.TrimSpace(c.Text())
|
// txt := strings.TrimSpace(c.Text())
|
||||||
switch {
|
// switch {
|
||||||
case strings.HasSuffix(txt, "likes"):
|
// case strings.HasSuffix(txt, "likes"):
|
||||||
l := strings.Split(txt, " ")
|
// l := strings.Split(txt, " ")
|
||||||
tweet.Likes, _ = strconv.Atoi(l[0])
|
// tweet.Likes, _ = strconv.Atoi(l[0])
|
||||||
case strings.HasSuffix(txt, "replies"):
|
// case strings.HasSuffix(txt, "replies"):
|
||||||
l := strings.Split(txt, " ")
|
// l := strings.Split(txt, " ")
|
||||||
tweet.Replies, _ = strconv.Atoi(l[0])
|
// tweet.Replies, _ = strconv.Atoi(l[0])
|
||||||
case strings.HasSuffix(txt, "retweets"):
|
// case strings.HasSuffix(txt, "retweets"):
|
||||||
l := strings.Split(txt, " ")
|
// l := strings.Split(txt, " ")
|
||||||
tweet.Retweets, _ = strconv.Atoi(l[0])
|
// tweet.Retweets, _ = strconv.Atoi(l[0])
|
||||||
}
|
// }
|
||||||
})
|
// })
|
||||||
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
|
s.Find(".hashtag > span.PrettyLink-value").Each(func(i int, h *goquery.Selection) {
|
||||||
tweet.Hashtags = append(tweet.Hashtags, h.Text())
|
tweet.Hashtags = append(tweet.Hashtags, h.Text())
|
||||||
})
|
})
|
||||||
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
|
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
|
||||||
|
|
@ -172,21 +169,21 @@ func readTweetsFromHTML(htm *strings.Reader) ([]*Tweet, error) {
|
||||||
tweet.URLs = append(tweet.URLs, link)
|
tweet.URLs = append(tweet.URLs, link)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
|
s.Find(".NaturalImage-image").Each(func(i int, p *goquery.Selection) {
|
||||||
if link, ok := p.Attr("data-image-url"); ok {
|
if link, ok := p.Attr("data-image"); ok {
|
||||||
tweet.Photos = append(tweet.Photos, link)
|
tweet.Photos = append(tweet.Photos, link)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
|
// s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
|
||||||
if style, ok := v.Attr("style"); ok {
|
// if style, ok := v.Attr("style"); ok {
|
||||||
if strings.Contains(style, "background") {
|
// if strings.Contains(style, "background") {
|
||||||
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
|
// match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
|
||||||
if len(match) == 2 {
|
// if len(match) == 2 {
|
||||||
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
|
// tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
})
|
// })
|
||||||
tweets = append(tweets, &tweet)
|
tweets = append(tweets, &tweet)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,9 @@ func TestGetTweets(t *testing.T) {
|
||||||
if tweet.ID == "" {
|
if tweet.ID == "" {
|
||||||
t.Error("Expected tweet ID is not empty")
|
t.Error("Expected tweet ID is not empty")
|
||||||
}
|
}
|
||||||
if tweet.UserID == "" {
|
// if tweet.UserID == "" {
|
||||||
t.Error("Expected tweet UserID is not empty")
|
// t.Error("Expected tweet UserID is not empty")
|
||||||
}
|
// }
|
||||||
if tweet.Username == "" {
|
if tweet.Username == "" {
|
||||||
t.Error("Expected tweet Username is not empty")
|
t.Error("Expected tweet Username is not empty")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
2
util.go
2
util.go
|
|
@ -41,7 +41,7 @@ func getHTMLFromJSON(req *http.Request, field string) (*strings.Reader, error) {
|
||||||
|
|
||||||
htm, ok := ajaxJSON[field].(string)
|
htm, ok := ajaxJSON[field].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("field not found in JSON")
|
return nil, fmt.Errorf("field %s not found in JSON", field)
|
||||||
}
|
}
|
||||||
|
|
||||||
return strings.NewReader(htm), nil
|
return strings.NewReader(htm), nil
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue