Add Trends

This commit is contained in:
Alexander Sheiko 2020-02-12 10:45:19 +02:00
parent 75d9805984
commit cea7f72d9d
5 changed files with 159 additions and 70 deletions

View file

@ -51,8 +51,27 @@ func main() {
} }
``` ```
### Get trends
```golang
package main
import (
"fmt"
twitterscraper "github.com/n0madic/twitter-scraper"
)
func main() {
trends, err := twitterscraper.GetTrends()
if err != nil {
panic(err)
}
fmt.Println(trends)
}
```
## Installation ## Installation
```shell ```shell
go get -u github.com/n0madic/twitter-scraper go get -u github.com/n0madic/twitter-scraper
``` ```

35
trends.go Normal file
View file

@ -0,0 +1,35 @@
package twitterscraper
import (
"net/http"
"github.com/PuerkitoBio/goquery"
)
const trendsURL = "https://twitter.com/i/trends"
// GetTrends return list of trends
func GetTrends() ([]string, error) {
req, err := http.NewRequest("GET", trendsURL, nil)
if err != nil {
return nil, err
}
htm, err := getHTMLFromJSON(req, "module_html")
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(htm)
if err != nil {
return nil, err
}
var trends []string
doc.Find("li").Each(func(i int, s *goquery.Selection) {
if trend, ok := s.Attr("data-trend-name"); ok {
trends = append(trends, trend)
}
})
return trends, nil
}

16
trends_test.go Normal file
View file

@ -0,0 +1,16 @@
package twitterscraper
import (
"testing"
)
func TestGetTrends(t *testing.T) {
trends, err := GetTrends()
if err != nil {
t.Error(err)
}
if len(trends) != 10 {
t.Error("Expected 10 trends")
}
}

124
tweets.go
View file

@ -1,7 +1,6 @@
package twitterscraper package twitterscraper
import ( import (
"encoding/json"
"fmt" "fmt"
"net/http" "net/http"
"regexp" "regexp"
@ -90,81 +89,68 @@ func FetchTweets(user string, last string) ([]*Tweet, error) {
} }
req.URL.RawQuery = q.Encode() req.URL.RawQuery = q.Encode()
resp, err := http.DefaultClient.Do(req) htm, err := getHTMLFromJSON(req, "items_html")
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK { doc, err := goquery.NewDocumentFromReader(htm)
ajaxJSON := make(map[string]interface{}) if err != nil {
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON) return nil, err
if err != nil { }
return nil, err
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(ajaxJSON["items_html"].(string))) doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
if err != nil { var tweet Tweet
return nil, err timeStr, ok := s.Find("._timestamp").Attr("data-time")
} if ok {
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) { tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
var tweet Tweet tweet.ID = s.AttrOr("data-item-id", "")
timeStr, ok := s.Find("._timestamp").Attr("data-time") tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
if ok { tweet.Text = s.Find(".tweet-text").Text()
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64) tweet.HTML, _ = s.Find(".tweet-text").Html()
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0) s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
tweet.ID = s.AttrOr("data-item-id", "") tweet.IsRetweet = true
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID) })
tweet.Text = s.Find(".tweet-text").Text() s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
tweet.HTML, _ = s.Find(".tweet-text").Html() txt := strings.TrimSpace(c.Text())
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) { if strings.HasSuffix(txt, "likes") {
tweet.IsRetweet = true l := strings.Split(txt, " ")
}) tweet.Likes, _ = strconv.Atoi(l[0])
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) { } else if strings.HasSuffix(txt, "replies") {
txt := strings.TrimSpace(c.Text()) l := strings.Split(txt, " ")
if strings.HasSuffix(txt, "likes") { tweet.Replies, _ = strconv.Atoi(l[0])
l := strings.Split(txt, " ") } else if strings.HasSuffix(txt, "retweets") {
tweet.Likes, _ = strconv.Atoi(l[0]) l := strings.Split(txt, " ")
} else if strings.HasSuffix(txt, "replies") { tweet.Retweets, _ = strconv.Atoi(l[0])
l := strings.Split(txt, " ") }
tweet.Replies, _ = strconv.Atoi(l[0]) })
} else if strings.HasSuffix(txt, "retweets") { s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
l := strings.Split(txt, " ") tweet.Hashtags = append(tweet.Hashtags, h.Text())
tweet.Retweets, _ = strconv.Atoi(l[0]) })
} s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
}) if link, ok := u.Attr("data-expanded-url"); ok {
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) { tweet.URLs = append(tweet.URLs, link)
tweet.Hashtags = append(tweet.Hashtags, h.Text()) }
}) })
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) { s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
if link, ok := u.Attr("data-expanded-url"); ok { if link, ok := p.Attr("data-image-url"); ok {
tweet.URLs = append(tweet.URLs, link) tweet.Photos = append(tweet.Photos, link)
} }
}) })
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) { s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
if link, ok := p.Attr("data-image-url"); ok { if style, ok := v.Attr("style"); ok {
tweet.Photos = append(tweet.Photos, link) if strings.Contains(style, "background") {
} match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
}) if len(match) == 2 {
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) { tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
if style, ok := v.Attr("style"); ok {
if strings.Contains(style, "background") {
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
if len(match) == 2 {
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
}
} }
} }
}) }
tweets = append(tweets, &tweet) })
} tweets = append(tweets, &tweet)
}) }
} else if resp.StatusCode == http.StatusNotFound { })
return nil, fmt.Errorf("user %s not found", user)
} else {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
return tweets, nil return tweets, nil
} }

33
util.go Normal file
View file

@ -0,0 +1,33 @@
package twitterscraper
import (
"encoding/json"
"fmt"
"net/http"
"strings"
)
func getHTMLFromJSON(req *http.Request, field string) (*strings.Reader, error) {
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("response status: %s", resp.Status)
}
ajaxJSON := make(map[string]interface{})
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON)
if err != nil {
return nil, err
}
htm, ok := ajaxJSON[field].(string)
if !ok {
return nil, fmt.Errorf("filed not found in JSON")
}
return strings.NewReader(htm), nil
}