2020-12-11 20:58:49 +02:00
|
|
|
package twitterscraper
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"encoding/json"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io/ioutil"
|
|
|
|
|
"net/http"
|
|
|
|
|
"sync"
|
2021-01-05 14:21:08 +02:00
|
|
|
"time"
|
2020-12-11 20:58:49 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const bearerToken string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
|
|
|
|
|
|
|
|
|
type user struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
User struct {
|
|
|
|
|
RestID string `json:"rest_id"`
|
2021-03-09 10:51:51 +02:00
|
|
|
Legacy struct {
|
|
|
|
|
CreatedAt string `json:"created_at"`
|
|
|
|
|
Description string `json:"description"`
|
|
|
|
|
Entities struct {
|
|
|
|
|
URL struct {
|
|
|
|
|
Urls []struct {
|
|
|
|
|
ExpandedURL string `json:"expanded_url"`
|
|
|
|
|
} `json:"urls"`
|
|
|
|
|
} `json:"url"`
|
|
|
|
|
} `json:"entities"`
|
|
|
|
|
FavouritesCount int `json:"favourites_count"`
|
|
|
|
|
FollowersCount int `json:"followers_count"`
|
|
|
|
|
FriendsCount int `json:"friends_count"`
|
|
|
|
|
IDStr string `json:"id_str"`
|
|
|
|
|
ListedCount int `json:"listed_count"`
|
|
|
|
|
Name string `json:"name"`
|
|
|
|
|
Location string `json:"location"`
|
|
|
|
|
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
|
|
|
|
ProfileBannerURL string `json:"profile_banner_url"`
|
|
|
|
|
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
|
|
|
|
Protected bool `json:"protected"`
|
|
|
|
|
ScreenName string `json:"screen_name"`
|
|
|
|
|
StatusesCount int `json:"statuses_count"`
|
|
|
|
|
Verified bool `json:"verified"`
|
|
|
|
|
} `json:"legacy"`
|
2020-12-11 20:58:49 +02:00
|
|
|
} `json:"user"`
|
|
|
|
|
} `json:"data"`
|
2021-01-25 10:31:41 +07:00
|
|
|
Errors []struct {
|
|
|
|
|
Message string `json:"message"`
|
|
|
|
|
} `json:"errors"`
|
2020-12-11 20:58:49 +02:00
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// Global cache for user IDs
|
|
|
|
|
var cacheIDs sync.Map
|
2020-12-11 20:58:49 +02:00
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// RequestAPI get JSON from frontend API and decodes it
|
|
|
|
|
func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
2021-01-05 14:21:08 +02:00
|
|
|
if s.guestToken == "" || s.guestCreatedAt.Before(time.Now().Add(-time.Hour*3)) {
|
2020-12-12 23:33:57 +02:00
|
|
|
err := s.GetGuestToken()
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
2020-12-12 23:33:57 +02:00
|
|
|
req.Header.Set("X-Guest-Token", s.guestToken)
|
2020-12-11 20:58:49 +02:00
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
resp, err := s.client.Do(req)
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
2021-01-25 10:31:41 +07:00
|
|
|
// private profiles return forbidden, but also data
|
|
|
|
|
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
|
2021-01-05 11:42:51 +02:00
|
|
|
return fmt.Errorf("response status %s", resp.Status)
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-05 15:15:27 +02:00
|
|
|
if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
|
|
|
|
|
s.guestToken = ""
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-11 20:58:49 +02:00
|
|
|
return json.NewDecoder(resp.Body).Decode(target)
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// GetGuestToken from Twitter API
|
|
|
|
|
func (s *Scraper) GetGuestToken() error {
|
2020-12-11 20:58:49 +02:00
|
|
|
req, err := http.NewRequest("POST", "https://api.twitter.com/1.1/guest/activate.json", nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
resp, err := s.client.Do(req)
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return fmt.Errorf("response status %s", resp.Status)
|
|
|
|
|
}
|
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var jsn map[string]interface{}
|
|
|
|
|
if err := json.Unmarshal(body, &jsn); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
var ok bool
|
2020-12-12 23:33:57 +02:00
|
|
|
if s.guestToken, ok = jsn["guest_token"].(string); !ok {
|
2020-12-11 20:58:49 +02:00
|
|
|
return fmt.Errorf("guest_token not found")
|
|
|
|
|
}
|
2021-01-05 14:21:08 +02:00
|
|
|
s.guestCreatedAt = time.Now()
|
2020-12-11 20:58:49 +02:00
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|