2020-05-13 17:35:44 +02:00
|
|
|
package twitterscraper
|
2020-05-14 14:59:33 +02:00
|
|
|
|
|
|
|
|
import (
|
2020-06-12 21:31:08 +08:00
|
|
|
"context"
|
2020-05-14 14:59:33 +02:00
|
|
|
"fmt"
|
|
|
|
|
"net/url"
|
|
|
|
|
"strconv"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const ajaxSearchURL = "https://twitter.com/i/search/timeline?q=%s"
|
|
|
|
|
|
2020-05-15 17:52:06 +02:00
|
|
|
// SearchTweets returns channel with tweets for a given search query
|
2020-06-12 21:31:08 +08:00
|
|
|
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
|
2020-05-14 14:59:33 +02:00
|
|
|
channel := make(chan *Result)
|
|
|
|
|
go func(query string) {
|
|
|
|
|
defer close(channel)
|
|
|
|
|
var maxId string
|
|
|
|
|
tweetsNbr := 0
|
|
|
|
|
for tweetsNbr < maxTweetsNbr {
|
2020-06-12 21:31:08 +08:00
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
channel <- &Result{Error: ctx.Err()}
|
|
|
|
|
return
|
|
|
|
|
default:
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-14 14:59:33 +02:00
|
|
|
tweets, err := FetchSearchTweets(query, maxId)
|
|
|
|
|
if err != nil {
|
|
|
|
|
channel <- &Result{Error: err}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(tweets) == 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for _, tweet := range tweets {
|
2020-06-12 21:31:08 +08:00
|
|
|
select {
|
|
|
|
|
case <-ctx.Done():
|
|
|
|
|
channel <- &Result{Error: ctx.Err()}
|
|
|
|
|
return
|
|
|
|
|
default:
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-14 14:59:33 +02:00
|
|
|
if tweetsNbr < maxTweetsNbr {
|
|
|
|
|
lastId, _ := strconv.ParseInt(tweet.ID, 10, 64)
|
2020-06-12 21:31:08 +08:00
|
|
|
maxId = strconv.FormatInt(lastId-1, 10)
|
2020-05-14 14:59:33 +02:00
|
|
|
channel <- &Result{Tweet: *tweet}
|
|
|
|
|
}
|
|
|
|
|
tweetsNbr++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}(query)
|
|
|
|
|
return channel
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-15 17:52:06 +02:00
|
|
|
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
2020-05-14 14:59:33 +02:00
|
|
|
func FetchSearchTweets(query, maxId string) ([]*Tweet, error) {
|
|
|
|
|
if maxId != "" {
|
|
|
|
|
query = query + " max_id:" + maxId
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-15 15:26:43 +03:00
|
|
|
req, err := newRequest(fmt.Sprintf(ajaxSearchURL, url.PathEscape(query)))
|
2020-05-14 14:59:33 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2020-06-15 15:26:43 +03:00
|
|
|
|
2020-05-14 14:59:33 +02:00
|
|
|
req.Header.Set("Referer", "https://twitter.com/search/timeline")
|
|
|
|
|
|
|
|
|
|
q := req.URL.Query()
|
|
|
|
|
q.Add("f", "tweets")
|
|
|
|
|
q.Add("include_available_features", "1")
|
|
|
|
|
q.Add("include_entities", "1")
|
|
|
|
|
q.Add("include_new_items_bar", "true")
|
|
|
|
|
|
|
|
|
|
req.URL.RawQuery = q.Encode()
|
|
|
|
|
|
|
|
|
|
htm, err := getHTMLFromJSON(req, "items_html")
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tweets, err := readTweetsFromHTML(htm)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return tweets, nil
|
|
|
|
|
}
|