Add Scraper object
This commit is contained in:
parent
edad8f6393
commit
6bf65cd482
9 changed files with 158 additions and 80 deletions
41
README.md
41
README.md
|
|
@ -26,7 +26,8 @@ import (
|
|||
)
|
||||
|
||||
func main() {
|
||||
for tweet := range twitterscraper.GetTweets(context.Background(), "Twitter", 50) {
|
||||
scraper := twitterscraper.New()
|
||||
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
|
||||
if tweet.Error != nil {
|
||||
panic(tweet.Error)
|
||||
}
|
||||
|
|
@ -51,7 +52,8 @@ import (
|
|||
)
|
||||
|
||||
func main() {
|
||||
for tweet := range twitterscraper.SearchTweets(context.Background(),
|
||||
scraper := twitterscraper.New()
|
||||
for tweet := range scraper.SearchTweets(context.Background(),
|
||||
"twitter scraper data -filter:retweets", 50) {
|
||||
if tweet.Error != nil {
|
||||
panic(tweet.Error)
|
||||
|
|
@ -76,7 +78,8 @@ import (
|
|||
)
|
||||
|
||||
func main() {
|
||||
profile, err := twitterscraper.GetProfile("Twitter")
|
||||
scraper := twitterscraper.New()
|
||||
profile, err := scraper.GetProfile("Twitter")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
@ -95,7 +98,8 @@ import (
|
|||
)
|
||||
|
||||
func main() {
|
||||
trends, err := twitterscraper.GetTrends()
|
||||
scraper := twitterscraper.New()
|
||||
trends, err := scraper.GetTrends()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
@ -106,11 +110,36 @@ func main() {
|
|||
### Use http proxy
|
||||
|
||||
```golang
|
||||
twitterscraper.SetProxy("http://localhost:3128")
|
||||
err := scraper.SetProxy("http://localhost:3128")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
```
|
||||
|
||||
### Load timeline with tweet replies
|
||||
|
||||
```golang
|
||||
twitterscraper.IncludeReplies = true
|
||||
scraper.WithReplies(true)
|
||||
```
|
||||
|
||||
### Default Scraper (Ad hoc)
|
||||
|
||||
In simple cases, you can use the default scraper without creating an object instance
|
||||
|
||||
```golang
|
||||
import twitterscraper "github.com/n0madic/twitter-scraper"
|
||||
|
||||
// for tweets
|
||||
twitterscraper.GetTweets(context.Background(), "Twitter", 50)
|
||||
// for tweets with replies
|
||||
twitterscraper.WithReplies(true).GetTweets(context.Background(), "Twitter", 50)
|
||||
|
||||
// for search
|
||||
twitterscraper.SearchTweets(context.Background(), "twitter", 50)
|
||||
|
||||
// for profile
|
||||
twitterscraper.GetProfile("Twitter")
|
||||
|
||||
// for trends
|
||||
twitterscraper.GetTrends()
|
||||
```
|
||||
29
api.go
29
api.go
|
|
@ -18,23 +18,22 @@ type user struct {
|
|||
} `json:"data"`
|
||||
}
|
||||
|
||||
var (
|
||||
guestToken string
|
||||
cacheIDs sync.Map
|
||||
)
|
||||
// Global cache for user IDs
|
||||
var cacheIDs sync.Map
|
||||
|
||||
func requestAPI(req *http.Request, target interface{}) error {
|
||||
if guestToken == "" {
|
||||
err := GetGuestToken()
|
||||
// RequestAPI get JSON from frontend API and decodes it
|
||||
func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||
if s.guestToken == "" {
|
||||
err := s.GetGuestToken()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
||||
req.Header.Set("X-Guest-Token", guestToken)
|
||||
req.Header.Set("X-Guest-Token", s.guestToken)
|
||||
|
||||
resp, err := newHTTPClient().Do(req)
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -43,15 +42,15 @@ func requestAPI(req *http.Request, target interface{}) error {
|
|||
return json.NewDecoder(resp.Body).Decode(target)
|
||||
}
|
||||
|
||||
// GetGuestToken from API
|
||||
func GetGuestToken() error {
|
||||
// GetGuestToken from Twitter API
|
||||
func (s *Scraper) GetGuestToken() error {
|
||||
req, err := http.NewRequest("POST", "https://api.twitter.com/1.1/guest/activate.json", nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
||||
|
||||
resp, err := newHTTPClient().Do(req)
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -70,7 +69,7 @@ func GetGuestToken() error {
|
|||
return err
|
||||
}
|
||||
var ok bool
|
||||
if guestToken, ok = jsn["guest_token"].(string); !ok {
|
||||
if s.guestToken, ok = jsn["guest_token"].(string); !ok {
|
||||
return fmt.Errorf("guest_token not found")
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +77,7 @@ func GetGuestToken() error {
|
|||
}
|
||||
|
||||
// GetUserIDByScreenName from API
|
||||
func GetUserIDByScreenName(screenName string) (string, error) {
|
||||
func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error) {
|
||||
id, ok := cacheIDs.Load(screenName)
|
||||
if ok {
|
||||
return id.(string), nil
|
||||
|
|
@ -90,7 +89,7 @@ func GetUserIDByScreenName(screenName string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
err = requestAPI(req, &jsn)
|
||||
err = s.RequestAPI(req, &jsn)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,16 +5,18 @@ import (
|
|||
)
|
||||
|
||||
func TestGetGuestToken(t *testing.T) {
|
||||
if err := GetGuestToken(); err != nil {
|
||||
scraper := New()
|
||||
if err := scraper.GetGuestToken(); err != nil {
|
||||
t.Errorf("getGuestToken() error = %v", err)
|
||||
}
|
||||
if guestToken == "" {
|
||||
if scraper.guestToken == "" {
|
||||
t.Error("Expected non-empty guestToken")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetUserIDByScreenName(t *testing.T) {
|
||||
userID, err := GetUserIDByScreenName("Twitter")
|
||||
scraper := New()
|
||||
userID, err := scraper.GetUserIDByScreenName("Twitter")
|
||||
if err != nil {
|
||||
t.Errorf("getUserByScreenName() error = %v", err)
|
||||
}
|
||||
|
|
|
|||
13
profile.go
13
profile.go
|
|
@ -30,13 +30,13 @@ type Profile struct {
|
|||
}
|
||||
|
||||
// GetProfile return parsed user profile.
|
||||
func GetProfile(username string) (Profile, error) {
|
||||
userID, err := GetUserIDByScreenName(username)
|
||||
func (s *Scraper) GetProfile(username string) (Profile, error) {
|
||||
userID, err := s.GetUserIDByScreenName(username)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
req, err := newRequest("GET", "https://twitter.com/i/api/2/timeline/profile/"+userID+".json")
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/timeline/profile/"+userID+".json")
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
|
@ -47,7 +47,7 @@ func GetProfile(username string) (Profile, error) {
|
|||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
err = requestAPI(req, &timeline)
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
|
@ -89,3 +89,8 @@ func GetProfile(username string) (Profile, error) {
|
|||
|
||||
return profile, nil
|
||||
}
|
||||
|
||||
// GetProfile wrapper for default scraper
|
||||
func GetProfile(username string) (Profile, error) {
|
||||
return defaultScraper.GetProfile(username)
|
||||
}
|
||||
|
|
|
|||
66
scraper.go
Normal file
66
scraper.go
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
package twitterscraper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Scraper object
|
||||
type Scraper struct {
|
||||
client *http.Client
|
||||
guestToken string
|
||||
includeReplies bool
|
||||
}
|
||||
|
||||
var defaultScraper Scraper
|
||||
|
||||
// New creates a Scraper object
|
||||
func New() Scraper {
|
||||
return Scraper{
|
||||
client: &http.Client{Timeout: 10 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
// WithReplies enable/disable load timeline with tweet replies
|
||||
func (s *Scraper) WithReplies(b bool) *Scraper {
|
||||
s.includeReplies = b
|
||||
return s
|
||||
}
|
||||
|
||||
// WithReplies wrapper for default Scraper
|
||||
func WithReplies(b bool) *Scraper {
|
||||
return defaultScraper.WithReplies(b)
|
||||
}
|
||||
|
||||
// SetProxy set http proxy in the format `http://HOST:PORT`
|
||||
func (s *Scraper) SetProxy(proxy string) error {
|
||||
if !strings.HasPrefix(proxy, "http://") {
|
||||
return errors.New("only support http protocol")
|
||||
}
|
||||
urlproxy, err := url.Parse(proxy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(urlproxy),
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 10 * time.Second,
|
||||
}).DialContext,
|
||||
},
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetProxy wrapper for default Scraper
|
||||
func SetProxy(proxy string) error {
|
||||
return defaultScraper.SetProxy(proxy)
|
||||
}
|
||||
|
||||
func init() {
|
||||
defaultScraper = New()
|
||||
}
|
||||
13
search.go
13
search.go
|
|
@ -7,18 +7,23 @@ import (
|
|||
)
|
||||
|
||||
// SearchTweets returns channel with tweets for a given search query
|
||||
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
|
||||
return getTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
|
||||
}
|
||||
|
||||
// SearchTweets wrapper for default Scraper
|
||||
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
|
||||
return getTimeline(ctx, query, maxTweetsNbr, FetchSearchTweets)
|
||||
return defaultScraper.SearchTweets(ctx, query, maxTweetsNbr)
|
||||
}
|
||||
|
||||
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
||||
func FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
query = url.PathEscape(query)
|
||||
if maxTweetsNbr > 200 {
|
||||
maxTweetsNbr = 200
|
||||
}
|
||||
|
||||
req, err := newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
|
@ -35,7 +40,7 @@ func FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet,
|
|||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
err = requestAPI(req, &timeline)
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
|
|
|||
11
trends.go
11
trends.go
|
|
@ -1,8 +1,8 @@
|
|||
package twitterscraper
|
||||
|
||||
// GetTrends return list of trends.
|
||||
func GetTrends() ([]string, error) {
|
||||
req, err := newRequest("GET", "https://twitter.com/i/api/2/guide.json")
|
||||
func (s *Scraper) GetTrends() ([]string, error) {
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/guide.json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@ func GetTrends() ([]string, error) {
|
|||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var jsn timeline
|
||||
err = requestAPI(req, &jsn)
|
||||
err = s.RequestAPI(req, &jsn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
@ -27,3 +27,8 @@ func GetTrends() ([]string, error) {
|
|||
|
||||
return trends, nil
|
||||
}
|
||||
|
||||
// GetTrends wrapper for default Scraper
|
||||
func GetTrends() ([]string, error) {
|
||||
return defaultScraper.GetTrends()
|
||||
}
|
||||
|
|
|
|||
15
tweets.go
15
tweets.go
|
|
@ -6,22 +6,27 @@ import (
|
|||
)
|
||||
|
||||
// GetTweets returns channel with tweets for a given user.
|
||||
func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
|
||||
return getTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
|
||||
}
|
||||
|
||||
// GetTweets wrapper for default Scraper
|
||||
func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
|
||||
return getTimeline(ctx, user, maxTweetsNbr, FetchTweets)
|
||||
return defaultScraper.GetTweets(ctx, user, maxTweetsNbr)
|
||||
}
|
||||
|
||||
// FetchTweets gets tweets for a given user, via the Twitter frontend API.
|
||||
func FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
if maxTweetsNbr > 200 {
|
||||
maxTweetsNbr = 200
|
||||
}
|
||||
|
||||
userID, err := GetUserIDByScreenName(user)
|
||||
userID, err := s.GetUserIDByScreenName(user)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
req, err := newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json")
|
||||
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json")
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
|
@ -35,7 +40,7 @@ func FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string
|
|||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
err = requestAPI(req, &timeline)
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
|
|
|||
42
util.go
42
util.go
|
|
@ -2,52 +2,14 @@ package twitterscraper
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
// IncludeReplies enable tweet reply
|
||||
IncludeReplies bool
|
||||
// HTTPProxy Public variable for Http proxy
|
||||
HTTPProxy *url.URL
|
||||
)
|
||||
|
||||
// SetProxy set http proxy format `http://HOST:PORT`
|
||||
func SetProxy(proxy string) error {
|
||||
if !strings.HasPrefix(proxy, "http://") {
|
||||
return errors.New("only support http protocol")
|
||||
}
|
||||
urlproxy, err := url.Parse(proxy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
HTTPProxy = urlproxy
|
||||
return nil
|
||||
}
|
||||
|
||||
func newHTTPClient() *http.Client {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
if HTTPProxy != nil {
|
||||
client = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
Proxy: http.ProxyURL(HTTPProxy),
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 10 * time.Second,
|
||||
}).DialContext,
|
||||
},
|
||||
}
|
||||
}
|
||||
return client
|
||||
}
|
||||
|
||||
func newRequest(method string, url string) (*http.Request, error) {
|
||||
func (s *Scraper) newRequest(method string, url string) (*http.Request, error) {
|
||||
req, err := http.NewRequest(method, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -75,7 +37,7 @@ func newRequest(method string, url string) (*http.Request, error) {
|
|||
q.Add("include_ext_media_availability", "true")
|
||||
q.Add("send_error_codes", "true")
|
||||
q.Add("simple_quoted_tweet", "true")
|
||||
q.Add("include_tweet_replies", strconv.FormatBool(IncludeReplies))
|
||||
q.Add("include_tweet_replies", strconv.FormatBool(s.includeReplies))
|
||||
q.Add("ext", "mediaStats,highlightedLabel")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue