Add Trends
This commit is contained in:
parent
75d9805984
commit
cea7f72d9d
5 changed files with 159 additions and 70 deletions
21
README.md
21
README.md
|
|
@ -51,8 +51,27 @@ func main() {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Get trends
|
||||||
|
|
||||||
|
```golang
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
trends, err := twitterscraper.GetTrends()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
fmt.Println(trends)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
go get -u github.com/n0madic/twitter-scraper
|
go get -u github.com/n0madic/twitter-scraper
|
||||||
```
|
```
|
||||||
|
|
|
||||||
35
trends.go
Normal file
35
trends.go
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
const trendsURL = "https://twitter.com/i/trends"
|
||||||
|
|
||||||
|
// GetTrends return list of trends
|
||||||
|
func GetTrends() ([]string, error) {
|
||||||
|
req, err := http.NewRequest("GET", trendsURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
htm, err := getHTMLFromJSON(req, "module_html")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := goquery.NewDocumentFromReader(htm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var trends []string
|
||||||
|
doc.Find("li").Each(func(i int, s *goquery.Selection) {
|
||||||
|
if trend, ok := s.Attr("data-trend-name"); ok {
|
||||||
|
trends = append(trends, trend)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return trends, nil
|
||||||
|
}
|
||||||
16
trends_test.go
Normal file
16
trends_test.go
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetTrends(t *testing.T) {
|
||||||
|
trends, err := GetTrends()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(trends) != 10 {
|
||||||
|
t.Error("Expected 10 trends")
|
||||||
|
}
|
||||||
|
}
|
||||||
124
tweets.go
124
tweets.go
|
|
@ -1,7 +1,6 @@
|
||||||
package twitterscraper
|
package twitterscraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
|
@ -90,81 +89,68 @@ func FetchTweets(user string, last string) ([]*Tweet, error) {
|
||||||
}
|
}
|
||||||
req.URL.RawQuery = q.Encode()
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
resp, err := http.DefaultClient.Do(req)
|
htm, err := getHTMLFromJSON(req, "items_html")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
|
|
||||||
if resp.StatusCode == http.StatusOK {
|
doc, err := goquery.NewDocumentFromReader(htm)
|
||||||
ajaxJSON := make(map[string]interface{})
|
if err != nil {
|
||||||
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON)
|
return nil, err
|
||||||
if err != nil {
|
}
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(ajaxJSON["items_html"].(string)))
|
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
|
||||||
if err != nil {
|
var tweet Tweet
|
||||||
return nil, err
|
timeStr, ok := s.Find("._timestamp").Attr("data-time")
|
||||||
}
|
if ok {
|
||||||
|
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
|
||||||
doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) {
|
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
|
||||||
var tweet Tweet
|
tweet.ID = s.AttrOr("data-item-id", "")
|
||||||
timeStr, ok := s.Find("._timestamp").Attr("data-time")
|
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
|
||||||
if ok {
|
tweet.Text = s.Find(".tweet-text").Text()
|
||||||
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
|
tweet.HTML, _ = s.Find(".tweet-text").Html()
|
||||||
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
|
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
|
||||||
tweet.ID = s.AttrOr("data-item-id", "")
|
tweet.IsRetweet = true
|
||||||
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
|
})
|
||||||
tweet.Text = s.Find(".tweet-text").Text()
|
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
|
||||||
tweet.HTML, _ = s.Find(".tweet-text").Html()
|
txt := strings.TrimSpace(c.Text())
|
||||||
s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) {
|
if strings.HasSuffix(txt, "likes") {
|
||||||
tweet.IsRetweet = true
|
l := strings.Split(txt, " ")
|
||||||
})
|
tweet.Likes, _ = strconv.Atoi(l[0])
|
||||||
s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) {
|
} else if strings.HasSuffix(txt, "replies") {
|
||||||
txt := strings.TrimSpace(c.Text())
|
l := strings.Split(txt, " ")
|
||||||
if strings.HasSuffix(txt, "likes") {
|
tweet.Replies, _ = strconv.Atoi(l[0])
|
||||||
l := strings.Split(txt, " ")
|
} else if strings.HasSuffix(txt, "retweets") {
|
||||||
tweet.Likes, _ = strconv.Atoi(l[0])
|
l := strings.Split(txt, " ")
|
||||||
} else if strings.HasSuffix(txt, "replies") {
|
tweet.Retweets, _ = strconv.Atoi(l[0])
|
||||||
l := strings.Split(txt, " ")
|
}
|
||||||
tweet.Replies, _ = strconv.Atoi(l[0])
|
})
|
||||||
} else if strings.HasSuffix(txt, "retweets") {
|
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
|
||||||
l := strings.Split(txt, " ")
|
tweet.Hashtags = append(tweet.Hashtags, h.Text())
|
||||||
tweet.Retweets, _ = strconv.Atoi(l[0])
|
})
|
||||||
}
|
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
|
||||||
})
|
if link, ok := u.Attr("data-expanded-url"); ok {
|
||||||
s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) {
|
tweet.URLs = append(tweet.URLs, link)
|
||||||
tweet.Hashtags = append(tweet.Hashtags, h.Text())
|
}
|
||||||
})
|
})
|
||||||
s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) {
|
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
|
||||||
if link, ok := u.Attr("data-expanded-url"); ok {
|
if link, ok := p.Attr("data-image-url"); ok {
|
||||||
tweet.URLs = append(tweet.URLs, link)
|
tweet.Photos = append(tweet.Photos, link)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) {
|
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
|
||||||
if link, ok := p.Attr("data-image-url"); ok {
|
if style, ok := v.Attr("style"); ok {
|
||||||
tweet.Photos = append(tweet.Photos, link)
|
if strings.Contains(style, "background") {
|
||||||
}
|
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
|
||||||
})
|
if len(match) == 2 {
|
||||||
s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) {
|
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
|
||||||
if style, ok := v.Attr("style"); ok {
|
|
||||||
if strings.Contains(style, "background") {
|
|
||||||
match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style)
|
|
||||||
if len(match) == 2 {
|
|
||||||
tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
tweets = append(tweets, &tweet)
|
})
|
||||||
}
|
tweets = append(tweets, &tweet)
|
||||||
})
|
}
|
||||||
} else if resp.StatusCode == http.StatusNotFound {
|
})
|
||||||
return nil, fmt.Errorf("user %s not found", user)
|
|
||||||
} else {
|
|
||||||
return nil, fmt.Errorf("response status: %s", resp.Status)
|
|
||||||
}
|
|
||||||
|
|
||||||
return tweets, nil
|
return tweets, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
33
util.go
Normal file
33
util.go
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func getHTMLFromJSON(req *http.Request, field string) (*strings.Reader, error) {
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("response status: %s", resp.Status)
|
||||||
|
}
|
||||||
|
|
||||||
|
ajaxJSON := make(map[string]interface{})
|
||||||
|
err = json.NewDecoder(resp.Body).Decode(&ajaxJSON)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
htm, ok := ajaxJSON[field].(string)
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("filed not found in JSON")
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.NewReader(htm), nil
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue