diff --git a/README.md b/README.md index c528391..34d0f70 100644 --- a/README.md +++ b/README.md @@ -51,8 +51,27 @@ func main() { } ``` +### Get trends + +```golang +package main + +import ( + "fmt" + twitterscraper "github.com/n0madic/twitter-scraper" +) + +func main() { + trends, err := twitterscraper.GetTrends() + if err != nil { + panic(err) + } + fmt.Println(trends) +} +``` + ## Installation ```shell go get -u github.com/n0madic/twitter-scraper -``` \ No newline at end of file +``` diff --git a/trends.go b/trends.go new file mode 100644 index 0000000..b630b2c --- /dev/null +++ b/trends.go @@ -0,0 +1,35 @@ +package twitterscraper + +import ( + "net/http" + + "github.com/PuerkitoBio/goquery" +) + +const trendsURL = "https://twitter.com/i/trends" + +// GetTrends return list of trends +func GetTrends() ([]string, error) { + req, err := http.NewRequest("GET", trendsURL, nil) + if err != nil { + return nil, err + } + + htm, err := getHTMLFromJSON(req, "module_html") + if err != nil { + return nil, err + } + + doc, err := goquery.NewDocumentFromReader(htm) + if err != nil { + return nil, err + } + + var trends []string + doc.Find("li").Each(func(i int, s *goquery.Selection) { + if trend, ok := s.Attr("data-trend-name"); ok { + trends = append(trends, trend) + } + }) + return trends, nil +} diff --git a/trends_test.go b/trends_test.go new file mode 100644 index 0000000..9f61445 --- /dev/null +++ b/trends_test.go @@ -0,0 +1,16 @@ +package twitterscraper + +import ( + "testing" +) + +func TestGetTrends(t *testing.T) { + trends, err := GetTrends() + if err != nil { + t.Error(err) + } + + if len(trends) != 10 { + t.Error("Expected 10 trends") + } +} diff --git a/tweets.go b/tweets.go index 78b54a8..c50705e 100644 --- a/tweets.go +++ b/tweets.go @@ -1,7 +1,6 @@ package twitterscraper import ( - "encoding/json" "fmt" "net/http" "regexp" @@ -90,81 +89,68 @@ func FetchTweets(user string, last string) ([]*Tweet, error) { } req.URL.RawQuery = q.Encode() - resp, err := http.DefaultClient.Do(req) + htm, err := getHTMLFromJSON(req, "items_html") if err != nil { return nil, err } - defer resp.Body.Close() - if resp.StatusCode == http.StatusOK { - ajaxJSON := make(map[string]interface{}) - err = json.NewDecoder(resp.Body).Decode(&ajaxJSON) - if err != nil { - return nil, err - } + doc, err := goquery.NewDocumentFromReader(htm) + if err != nil { + return nil, err + } - doc, err := goquery.NewDocumentFromReader(strings.NewReader(ajaxJSON["items_html"].(string))) - if err != nil { - return nil, err - } - - doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) { - var tweet Tweet - timeStr, ok := s.Find("._timestamp").Attr("data-time") - if ok { - tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64) - tweet.TimeParsed = time.Unix(tweet.Timestamp, 0) - tweet.ID = s.AttrOr("data-item-id", "") - tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID) - tweet.Text = s.Find(".tweet-text").Text() - tweet.HTML, _ = s.Find(".tweet-text").Html() - s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) { - tweet.IsRetweet = true - }) - s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) { - txt := strings.TrimSpace(c.Text()) - if strings.HasSuffix(txt, "likes") { - l := strings.Split(txt, " ") - tweet.Likes, _ = strconv.Atoi(l[0]) - } else if strings.HasSuffix(txt, "replies") { - l := strings.Split(txt, " ") - tweet.Replies, _ = strconv.Atoi(l[0]) - } else if strings.HasSuffix(txt, "retweets") { - l := strings.Split(txt, " ") - tweet.Retweets, _ = strconv.Atoi(l[0]) - } - }) - s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) { - tweet.Hashtags = append(tweet.Hashtags, h.Text()) - }) - s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) { - if link, ok := u.Attr("data-expanded-url"); ok { - tweet.URLs = append(tweet.URLs, link) - } - }) - s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) { - if link, ok := p.Attr("data-image-url"); ok { - tweet.Photos = append(tweet.Photos, link) - } - }) - s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) { - if style, ok := v.Attr("style"); ok { - if strings.Contains(style, "background") { - match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style) - if len(match) == 2 { - tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]}) - } + doc.Find(".stream-item").Each(func(i int, s *goquery.Selection) { + var tweet Tweet + timeStr, ok := s.Find("._timestamp").Attr("data-time") + if ok { + tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64) + tweet.TimeParsed = time.Unix(tweet.Timestamp, 0) + tweet.ID = s.AttrOr("data-item-id", "") + tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID) + tweet.Text = s.Find(".tweet-text").Text() + tweet.HTML, _ = s.Find(".tweet-text").Html() + s.Find(".js-retweet-text, .QuoteTweet").Each(func(i int, c *goquery.Selection) { + tweet.IsRetweet = true + }) + s.Find(".ProfileTweet-actionCount").Each(func(i int, c *goquery.Selection) { + txt := strings.TrimSpace(c.Text()) + if strings.HasSuffix(txt, "likes") { + l := strings.Split(txt, " ") + tweet.Likes, _ = strconv.Atoi(l[0]) + } else if strings.HasSuffix(txt, "replies") { + l := strings.Split(txt, " ") + tweet.Replies, _ = strconv.Atoi(l[0]) + } else if strings.HasSuffix(txt, "retweets") { + l := strings.Split(txt, " ") + tweet.Retweets, _ = strconv.Atoi(l[0]) + } + }) + s.Find(".twitter-hashtag").Each(func(i int, h *goquery.Selection) { + tweet.Hashtags = append(tweet.Hashtags, h.Text()) + }) + s.Find("a.twitter-timeline-link:not(.u-hidden)").Each(func(i int, u *goquery.Selection) { + if link, ok := u.Attr("data-expanded-url"); ok { + tweet.URLs = append(tweet.URLs, link) + } + }) + s.Find(".AdaptiveMedia-photoContainer").Each(func(i int, p *goquery.Selection) { + if link, ok := p.Attr("data-image-url"); ok { + tweet.Photos = append(tweet.Photos, link) + } + }) + s.Find(".PlayableMedia-player").Each(func(i int, v *goquery.Selection) { + if style, ok := v.Attr("style"); ok { + if strings.Contains(style, "background") { + match := regexp.MustCompile(`https:\/\/.+\/([\w-]+)\.(?:jpg|png)`).FindStringSubmatch(style) + if len(match) == 2 { + tweet.Videos = append(tweet.Videos, Video{ID: match[1], Preview: match[0]}) } } - }) - tweets = append(tweets, &tweet) - } - }) - } else if resp.StatusCode == http.StatusNotFound { - return nil, fmt.Errorf("user %s not found", user) - } else { - return nil, fmt.Errorf("response status: %s", resp.Status) - } + } + }) + tweets = append(tweets, &tweet) + } + }) return tweets, nil } diff --git a/util.go b/util.go new file mode 100644 index 0000000..70d06a3 --- /dev/null +++ b/util.go @@ -0,0 +1,33 @@ +package twitterscraper + +import ( + "encoding/json" + "fmt" + "net/http" + "strings" +) + +func getHTMLFromJSON(req *http.Request, field string) (*strings.Reader, error) { + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("response status: %s", resp.Status) + } + + ajaxJSON := make(map[string]interface{}) + err = json.NewDecoder(resp.Body).Decode(&ajaxJSON) + if err != nil { + return nil, err + } + + htm, ok := ajaxJSON[field].(string) + if !ok { + return nil, fmt.Errorf("filed not found in JSON") + } + + return strings.NewReader(htm), nil +}