Merge pull request #6 from xiscocapllonch/master
New feature for search queries
This commit is contained in:
commit
bf3225765d
5 changed files with 161 additions and 2 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
||||||
*.htm?
|
*.htm?
|
||||||
*.json
|
*.json
|
||||||
|
.idea/
|
||||||
|
|
|
||||||
29
README.md
29
README.md
|
|
@ -10,7 +10,7 @@ You can use this library to get the text of any user's Tweets trivially.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### Get tweets
|
### Get user tweets
|
||||||
|
|
||||||
```golang
|
```golang
|
||||||
package main
|
package main
|
||||||
|
|
@ -32,6 +32,33 @@ func main() {
|
||||||
|
|
||||||
It appears you can ask for up to 25 pages of tweets reliably (~486 tweets).
|
It appears you can ask for up to 25 pages of tweets reliably (~486 tweets).
|
||||||
|
|
||||||
|
### Search tweets by query standard operators
|
||||||
|
|
||||||
|
Tweets containing “twitter” and “scraper” and “data“, filtering out retweets:
|
||||||
|
|
||||||
|
```golang
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
for tweet := range twitterscraper.SearchTweets("twitter scraper data -filter:retweets", 50) {
|
||||||
|
if tweet.Error != nil {
|
||||||
|
panic(tweet.Error)
|
||||||
|
}
|
||||||
|
fmt.Println(tweet.HTML)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The search ends if we have 50 tweets.
|
||||||
|
|
||||||
|
See <https://developer.twitter.com/en/docs/tweets/rules-and-filtering/overview/standard-operators> for build standard queries.
|
||||||
|
|
||||||
|
|
||||||
### Get profile
|
### Get profile
|
||||||
|
|
||||||
```golang
|
```golang
|
||||||
|
|
|
||||||
83
search.go
Normal file
83
search.go
Normal file
|
|
@ -0,0 +1,83 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
const ajaxSearchURL = "https://twitter.com/i/search/timeline?q=%s"
|
||||||
|
|
||||||
|
// SearchTweets returns channel with tweets for a given search query
|
||||||
|
func SearchTweets(query string, maxTweetsNbr int) <-chan *Result {
|
||||||
|
channel := make(chan *Result)
|
||||||
|
go func(query string) {
|
||||||
|
defer close(channel)
|
||||||
|
var maxId string
|
||||||
|
tweetsNbr := 0
|
||||||
|
for tweetsNbr < maxTweetsNbr {
|
||||||
|
tweets, err := FetchSearchTweets(query, maxId)
|
||||||
|
if err != nil {
|
||||||
|
channel <- &Result{Error: err}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(tweets) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tweet := range tweets {
|
||||||
|
if tweetsNbr < maxTweetsNbr {
|
||||||
|
lastId, _ := strconv.ParseInt(tweet.ID, 10, 64)
|
||||||
|
maxId = strconv.FormatInt(lastId - 1, 10)
|
||||||
|
channel <- &Result{Tweet: *tweet}
|
||||||
|
}
|
||||||
|
tweetsNbr++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(query)
|
||||||
|
return channel
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
||||||
|
func FetchSearchTweets(query, maxId string) ([]*Tweet, error) {
|
||||||
|
if maxId != "" {
|
||||||
|
query = query + " max_id:" + maxId
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequest(
|
||||||
|
"GET",
|
||||||
|
fmt.Sprintf(ajaxSearchURL, url.PathEscape(query)),
|
||||||
|
nil,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Referer", "https://twitter.com/search/timeline")
|
||||||
|
req.Header.Set("Accept", "application/json, text/javascript, */*; q=0.01")
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8")
|
||||||
|
req.Header.Set("Accept-Language", "en-US")
|
||||||
|
req.Header.Set("X-Twitter-Active-User", "yes")
|
||||||
|
req.Header.Set("X-Requested-With", "XMLHttpRequest")
|
||||||
|
|
||||||
|
q := req.URL.Query()
|
||||||
|
q.Add("f", "tweets")
|
||||||
|
q.Add("include_available_features", "1")
|
||||||
|
q.Add("include_entities", "1")
|
||||||
|
q.Add("include_new_items_bar", "true")
|
||||||
|
|
||||||
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
|
htm, err := getHTMLFromJSON(req, "items_html")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tweets, err := readTweetsFromHTML(htm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tweets, nil
|
||||||
|
}
|
||||||
37
search_test.go
Normal file
37
search_test.go
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
package twitterscraper
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestGetSearchTweets(t *testing.T) {
|
||||||
|
count := 0
|
||||||
|
maxTweetsNbr := 50
|
||||||
|
for tweet := range SearchTweets("twitter scraper data -filter:retweets", maxTweetsNbr) {
|
||||||
|
if tweet.Error != nil {
|
||||||
|
t.Error(tweet.Error)
|
||||||
|
} else {
|
||||||
|
count++
|
||||||
|
if tweet.HTML == "" {
|
||||||
|
t.Error("Expected tweet HTML is not empty")
|
||||||
|
}
|
||||||
|
if tweet.ID == "" {
|
||||||
|
t.Error("Expected tweet ID is not empty")
|
||||||
|
}
|
||||||
|
if tweet.PermanentURL == "" {
|
||||||
|
t.Error("Expected tweet PermanentURL is not empty")
|
||||||
|
}
|
||||||
|
if tweet.Text == "" {
|
||||||
|
t.Error("Expected tweet Text is not empty")
|
||||||
|
}
|
||||||
|
if tweet.TimeParsed.IsZero() {
|
||||||
|
t.Error("Expected tweet TimeParsed is not zero")
|
||||||
|
}
|
||||||
|
if tweet.Timestamp == 0 {
|
||||||
|
t.Error("Expected tweet Timestamp is greater than zero")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count != maxTweetsNbr {
|
||||||
|
t.Errorf("Expected tweets count=%v, got: %v", maxTweetsNbr, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
13
tweets.go
13
tweets.go
|
|
@ -68,7 +68,6 @@ func GetTweets(user string, pages int) <-chan *Result {
|
||||||
|
|
||||||
// FetchTweets gets tweets for a given user, via the Twitter frontend API.
|
// FetchTweets gets tweets for a given user, via the Twitter frontend API.
|
||||||
func FetchTweets(user string, last string) ([]*Tweet, error) {
|
func FetchTweets(user string, last string) ([]*Tweet, error) {
|
||||||
var tweets []*Tweet
|
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", fmt.Sprintf(ajaxURL, user), nil)
|
req, err := http.NewRequest("GET", fmt.Sprintf(ajaxURL, user), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -96,6 +95,17 @@ func FetchTweets(user string, last string) ([]*Tweet, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tweets, err := readTweetsFromHTML(htm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tweets, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readTweetsFromHTML (htm *strings.Reader) ([]*Tweet, error) {
|
||||||
|
var tweets []*Tweet
|
||||||
|
|
||||||
doc, err := goquery.NewDocumentFromReader(htm)
|
doc, err := goquery.NewDocumentFromReader(htm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|
@ -108,6 +118,7 @@ func FetchTweets(user string, last string) ([]*Tweet, error) {
|
||||||
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
|
tweet.Timestamp, _ = strconv.ParseInt(timeStr, 10, 64)
|
||||||
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
|
tweet.TimeParsed = time.Unix(tweet.Timestamp, 0)
|
||||||
tweet.ID = s.AttrOr("data-item-id", "")
|
tweet.ID = s.AttrOr("data-item-id", "")
|
||||||
|
user, _ := s.Find(".tweet").Attr("data-screen-name")
|
||||||
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
|
tweet.PermanentURL = fmt.Sprintf("https://twitter.com/%s/status/%s", user, tweet.ID)
|
||||||
tweet.Text = s.Find(".tweet-text").Text()
|
tweet.Text = s.Find(".tweet-text").Text()
|
||||||
tweet.HTML, _ = s.Find(".tweet-text").Html()
|
tweet.HTML, _ = s.Find(".tweet-text").Html()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue