Add Trends

This commit is contained in:
Alexander Sheiko 2020-02-12 10:45:19 +02:00
parent 75d9805984
commit cea7f72d9d
5 changed files with 159 additions and 70 deletions

View file

@ -51,8 +51,27 @@ func main() {
}
```
### Get trends
```golang
package main
import (
"fmt"
twitterscraper "github.com/n0madic/twitter-scraper"
)
func main() {
trends, err := twitterscraper.GetTrends()
if err != nil {
panic(err)
}
fmt.Println(trends)
}
```
## Installation
```shell
go get -u github.com/n0madic/twitter-scraper
```
```

35
trends.go Normal file
View file

@ -0,0 +1,35 @@
package twitterscraper
import (
"net/http"
"github.com/PuerkitoBio/goquery"
)
const trendsURL = "https://twitter.com/i/trends"
// GetTrends return list of trends
func GetTrends() ([]string, error) {
req, err := http.NewRequest("GET", trendsURL, nil)
if err != nil {
return nil, err
}
htm, err := getHTMLFromJSON(req, "module_html")
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(htm)
if err != nil {
return nil, err
}
var trends []string
doc.Find("li").Each(func(i int, s *goquery.Selection) {
if trend, ok := s.Attr("data-trend-name"); ok {
trends = append(trends, trend)
}
})
return trends, nil
}

16
trends_test.go Normal file
View file

@ -0,0 +1,16 @@
package twitterscraper
import (
"testing"
)
func TestGetTrends(t *testing.T) {
trends, err := GetTrends()
if err != nil {
t.Error(err)
}
if len(trends) != 10 {
t.Error("Expected 10 trends")
}
}

124
tweets.go
View file

@ -1,7 +1,6 @@
package twitterscraper
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
@ -90,81 +89,68 @@ func FetchTweets(user string, last string) ([]*Tweet, error) {
}
req.URL.RawQuery = q.Encode()
resp, err := http.DefaultClient.Do(req)
htm, err := getHTMLFromJSON(req, "items_html")
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
ajaxJSON := make(map[string]interface{})
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON)
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(htm)
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(ajaxJSON["items_html"].(string)))
if err != nil {
return nil, err
}
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
var tweet Tweet
timeStr, ok := s.Find("._timestamp").Attr("data-time")
if ok {
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
tweet.ID = s.AttrOr("data-item-id", "")
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
tweet.Text = s.Find(".tweet-text").Text()
tweet.HTML, _ = s.Find(".tweet-text").Html()
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
tweet.IsRetweet = true
})
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
txt := strings.TrimSpace(c.Text())
if strings.HasSuffix(txt, "likes") {
l := strings.Split(txt, " ")
tweet.Likes, _ = strconv.Atoi(l[0])
} else if strings.HasSuffix(txt, "replies") {
l := strings.Split(txt, " ")
tweet.Replies, _ = strconv.Atoi(l[0])
} else if strings.HasSuffix(txt, "retweets") {
l := strings.Split(txt, " ")
tweet.Retweets, _ = strconv.Atoi(l[0])
}
})
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
tweet.Hashtags = append(tweet.Hashtags, h.Text())
})
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
if link, ok := u.Attr("data-expanded-url"); ok {
tweet.URLs = append(tweet.URLs, link)
}
})
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
if link, ok := p.Attr("data-image-url"); ok {
tweet.Photos = append(tweet.Photos, link)
}
})
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
if style, ok := v.Attr("style"); ok {
if strings.Contains(style, "background") {
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
if len(match) == 2 {
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
}
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
var tweet Tweet
timeStr, ok := s.Find("._timestamp").Attr("data-time")
if ok {
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
tweet.ID = s.AttrOr("data-item-id", "")
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
tweet.Text = s.Find(".tweet-text").Text()
tweet.HTML, _ = s.Find(".tweet-text").Html()
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
tweet.IsRetweet = true
})
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
txt := strings.TrimSpace(c.Text())
if strings.HasSuffix(txt, "likes") {
l := strings.Split(txt, " ")
tweet.Likes, _ = strconv.Atoi(l[0])
} else if strings.HasSuffix(txt, "replies") {
l := strings.Split(txt, " ")
tweet.Replies, _ = strconv.Atoi(l[0])
} else if strings.HasSuffix(txt, "retweets") {
l := strings.Split(txt, " ")
tweet.Retweets, _ = strconv.Atoi(l[0])
}
})
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
tweet.Hashtags = append(tweet.Hashtags, h.Text())
})
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
if link, ok := u.Attr("data-expanded-url"); ok {
tweet.URLs = append(tweet.URLs, link)
}
})
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
if link, ok := p.Attr("data-image-url"); ok {
tweet.Photos = append(tweet.Photos, link)
}
})
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
if style, ok := v.Attr("style"); ok {
if strings.Contains(style, "background") {
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
if len(match) == 2 {
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
}
}
})
tweets = append(tweets, &tweet)
}
})
} else if resp.StatusCode == http.StatusNotFound {
return nil, fmt.Errorf("user %s not found", user)
} else {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
}
})
tweets = append(tweets, &tweet)
}
})
return tweets, nil
}

33
util.go Normal file
View file

@ -0,0 +1,33 @@
package twitterscraper
import (
"encoding/json"
"fmt"
"net/http"
"strings"
)
func getHTMLFromJSON(req *http.Request, field string) (*strings.Reader, error) {
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
ajaxJSON := make(map[string]interface{})
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON)
if err != nil {
return nil, err
}
htm, ok := ajaxJSON[field].(string)
if !ok {
return nil, fmt.Errorf("filed not found in JSON")
}
return strings.NewReader(htm), nil
}