2020-12-11 20:58:49 +02:00
|
|
|
package twitterscraper
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"encoding/json"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io/ioutil"
|
|
|
|
|
"net/http"
|
2021-01-05 14:21:08 +02:00
|
|
|
"time"
|
2020-12-11 20:58:49 +02:00
|
|
|
)
|
|
|
|
|
|
2022-04-17 23:38:54 +03:00
|
|
|
const bearerToken string = "AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw"
|
2020-12-11 20:58:49 +02:00
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// RequestAPI get JSON from frontend API and decodes it
|
|
|
|
|
func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
2021-07-16 13:52:22 +03:00
|
|
|
s.wg.Wait()
|
|
|
|
|
if s.delay > 0 {
|
|
|
|
|
defer func() {
|
|
|
|
|
s.wg.Add(1)
|
|
|
|
|
go func() {
|
|
|
|
|
time.Sleep(time.Second * time.Duration(s.delay))
|
|
|
|
|
s.wg.Done()
|
|
|
|
|
}()
|
|
|
|
|
}()
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-07 10:18:01 +02:00
|
|
|
if !s.IsGuestToken() || s.guestCreatedAt.Before(time.Now().Add(-time.Hour*3)) {
|
2020-12-12 23:33:57 +02:00
|
|
|
err := s.GetGuestToken()
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
2020-12-12 23:33:57 +02:00
|
|
|
req.Header.Set("X-Guest-Token", s.guestToken)
|
2020-12-11 20:58:49 +02:00
|
|
|
|
2021-09-08 16:02:53 +08:00
|
|
|
// use cookie
|
2021-09-09 11:15:53 +08:00
|
|
|
if len(s.cookie) > 0 && len(s.xCsrfToken) > 0 {
|
|
|
|
|
req.Header.Set("Cookie", s.cookie)
|
|
|
|
|
req.Header.Set("x-csrf-token", s.xCsrfToken)
|
2021-09-08 16:02:53 +08:00
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
resp, err := s.client.Do(req)
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
2021-01-25 10:31:41 +07:00
|
|
|
// private profiles return forbidden, but also data
|
|
|
|
|
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
|
2021-04-22 21:38:49 +03:00
|
|
|
content, _ := ioutil.ReadAll(resp.Body)
|
|
|
|
|
return fmt.Errorf("response status %s: %s", resp.Status, content)
|
2021-01-05 11:42:51 +02:00
|
|
|
}
|
|
|
|
|
|
2021-01-05 15:15:27 +02:00
|
|
|
if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
|
|
|
|
|
s.guestToken = ""
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-11 20:58:49 +02:00
|
|
|
return json.NewDecoder(resp.Body).Decode(target)
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
// GetGuestToken from Twitter API
|
|
|
|
|
func (s *Scraper) GetGuestToken() error {
|
2020-12-11 20:58:49 +02:00
|
|
|
req, err := http.NewRequest("POST", "https://api.twitter.com/1.1/guest/activate.json", nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
req.Header.Set("Authorization", "Bearer "+bearerToken)
|
|
|
|
|
|
2020-12-12 23:33:57 +02:00
|
|
|
resp, err := s.client.Do(req)
|
2020-12-11 20:58:49 +02:00
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
body, err := ioutil.ReadAll(resp.Body)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2022-04-09 20:00:09 +03:00
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
return fmt.Errorf("response status %s: %s", resp.Status, body)
|
|
|
|
|
}
|
2020-12-11 20:58:49 +02:00
|
|
|
|
|
|
|
|
var jsn map[string]interface{}
|
|
|
|
|
if err := json.Unmarshal(body, &jsn); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
var ok bool
|
2020-12-12 23:33:57 +02:00
|
|
|
if s.guestToken, ok = jsn["guest_token"].(string); !ok {
|
2020-12-11 20:58:49 +02:00
|
|
|
return fmt.Errorf("guest_token not found")
|
|
|
|
|
}
|
2021-01-05 14:21:08 +02:00
|
|
|
s.guestCreatedAt = time.Now()
|
2020-12-11 20:58:49 +02:00
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|