parent
5b0c8c3f9f
commit
07ad3789ef
9 changed files with 274 additions and 147 deletions
149
README.md
149
README.md
|
|
@ -16,6 +16,75 @@ go get -u github.com/n0madic/twitter-scraper
|
|||
|
||||
## Usage
|
||||
|
||||
### Authentication
|
||||
|
||||
Authentication is required for all methods now!
|
||||
|
||||
#### Login
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password")
|
||||
```
|
||||
|
||||
Use username to login, not email!
|
||||
But if you have email confirmation, use email address in addition:
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password", "email")
|
||||
```
|
||||
|
||||
If you have two-factor authentication, use code:
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password", "code")
|
||||
```
|
||||
|
||||
Status of login can be checked with:
|
||||
|
||||
```golang
|
||||
scraper.IsLoggedIn()
|
||||
```
|
||||
|
||||
Logout (clear session):
|
||||
|
||||
```golang
|
||||
scraper.Logout()
|
||||
```
|
||||
|
||||
If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`.
|
||||
|
||||
For example, save cookies:
|
||||
|
||||
```golang
|
||||
cookies := scraper.GetCookies()
|
||||
// serialize to JSON
|
||||
js, _ := json.Marshal(cookies)
|
||||
// save to file
|
||||
f, _ = os.Create("cookies.json")
|
||||
f.Write(js)
|
||||
```
|
||||
|
||||
and load cookies:
|
||||
|
||||
```golang
|
||||
f, _ := os.Open("cookies.json")
|
||||
// deserialize from JSON
|
||||
var cookies []*http.Cookie
|
||||
json.NewDecoder(f).Decode(&cookies)
|
||||
// load cookies
|
||||
scraper.SetCookies(cookies)
|
||||
// check login status
|
||||
scraper.IsLoggedIn()
|
||||
```
|
||||
|
||||
#### Open account
|
||||
|
||||
If you don't want to use your account, you can try login as a Twitter app:
|
||||
|
||||
```golang
|
||||
err := scraper.LoginOpenAccount()
|
||||
```
|
||||
|
||||
### Get user tweets
|
||||
|
||||
```golang
|
||||
|
|
@ -29,7 +98,10 @@ import (
|
|||
|
||||
func main() {
|
||||
scraper := twitterscraper.New()
|
||||
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err !== nil {
|
||||
panic(err)
|
||||
}
|
||||
for tweet := range scraper.GetTweets(context.Background(), "Twitter", 50) {
|
||||
if tweet.Error != nil {
|
||||
panic(tweet.Error)
|
||||
|
|
@ -54,6 +126,10 @@ import (
|
|||
|
||||
func main() {
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err !== nil {
|
||||
panic(err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1328684389388185600")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
|
@ -124,6 +200,7 @@ import (
|
|||
|
||||
func main() {
|
||||
scraper := twitterscraper.New()
|
||||
scraper.LoginOpenAccount()
|
||||
profile, err := scraper.GetProfile("Twitter")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
|
@ -178,76 +255,6 @@ func main() {
|
|||
}
|
||||
```
|
||||
|
||||
### Use authentication
|
||||
|
||||
Some specified user tweets are protected that you must login and follow.
|
||||
It is also required to search.
|
||||
|
||||
#### Login
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password")
|
||||
```
|
||||
|
||||
Use username to login, not email!
|
||||
But if you have email confirmation, use email address in addition:
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password", "email")
|
||||
```
|
||||
|
||||
If you have two-factor authentication, use code:
|
||||
|
||||
```golang
|
||||
err := scraper.Login("username", "password", "code")
|
||||
```
|
||||
|
||||
Status of login can be checked with:
|
||||
|
||||
```golang
|
||||
scraper.IsLoggedIn()
|
||||
```
|
||||
|
||||
Logout (clear session):
|
||||
|
||||
```golang
|
||||
scraper.Logout()
|
||||
```
|
||||
|
||||
If you want save session between restarts, you can save cookies with `scraper.GetCookies()` and restore with `scraper.SetCookies()`.
|
||||
|
||||
For example, save cookies:
|
||||
|
||||
```golang
|
||||
cookies := scraper.GetCookies()
|
||||
// serialize to JSON
|
||||
js, _ := json.Marshal(cookies)
|
||||
// save to file
|
||||
f, _ = os.Create("cookies.json")
|
||||
f.Write(js)
|
||||
```
|
||||
|
||||
and load cookies:
|
||||
|
||||
```golang
|
||||
f, _ := os.Open("cookies.json")
|
||||
// deserialize from JSON
|
||||
var cookies []*http.Cookie
|
||||
json.NewDecoder(f).Decode(&cookies)
|
||||
// load cookies
|
||||
scraper.SetCookies(cookies)
|
||||
// check login status
|
||||
scraper.IsLoggedIn()
|
||||
```
|
||||
|
||||
#### Open account
|
||||
|
||||
If you don't want to use your account, you can login as a Twitter app:
|
||||
|
||||
```golang
|
||||
err := scraper.LoginOpenAccount()
|
||||
```
|
||||
|
||||
### Use Proxy
|
||||
|
||||
Support HTTP(s) and SOCKS5 proxy
|
||||
|
|
|
|||
3
auth.go
3
auth.go
|
|
@ -289,6 +289,7 @@ func (s *Scraper) Login(credentials ...string) error {
|
|||
}
|
||||
|
||||
s.isLogged = true
|
||||
s.isOpenAccount = false
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -339,6 +340,7 @@ func (s *Scraper) LoginOpenAccount() error {
|
|||
s.oAuthToken = info.Subtasks[0].OpenAccount.OAuthToken
|
||||
s.oAuthSecret = info.Subtasks[0].OpenAccount.OAuthTokenSecret
|
||||
s.isLogged = true
|
||||
s.isOpenAccount = true
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
|
@ -356,6 +358,7 @@ func (s *Scraper) Logout() error {
|
|||
}
|
||||
|
||||
s.isLogged = false
|
||||
s.isOpenAccount = false
|
||||
s.guestToken = ""
|
||||
s.oAuthToken = ""
|
||||
s.oAuthSecret = ""
|
||||
|
|
|
|||
|
|
@ -31,6 +31,10 @@ func TestGetProfile(t *testing.T) {
|
|||
}
|
||||
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
profile, err := scraper.GetProfile("nomadic_ua")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -83,6 +87,10 @@ func TestGetProfilePrivate(t *testing.T) {
|
|||
}
|
||||
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
// some random private profile (found via google)
|
||||
profile, err := scraper.GetProfile("tomdumont")
|
||||
if err != nil {
|
||||
|
|
@ -114,7 +122,11 @@ func TestGetProfilePrivate(t *testing.T) {
|
|||
|
||||
func TestGetProfileErrorSuspended(t *testing.T) {
|
||||
scraper := twitterscraper.New()
|
||||
_, err := scraper.GetProfile("123")
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
_, err = scraper.GetProfile("123")
|
||||
if err == nil {
|
||||
t.Error("Expected Error, got success")
|
||||
} else {
|
||||
|
|
@ -128,7 +140,11 @@ func TestGetProfileErrorNotFound(t *testing.T) {
|
|||
neUser := "sample3123131"
|
||||
expectedError := fmt.Sprintf("User '%s' not found", neUser)
|
||||
scraper := twitterscraper.New()
|
||||
_, err := scraper.GetProfile(neUser)
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
_, err = scraper.GetProfile(neUser)
|
||||
if err == nil {
|
||||
t.Error("Expected Error, got success")
|
||||
} else {
|
||||
|
|
@ -140,6 +156,10 @@ func TestGetProfileErrorNotFound(t *testing.T) {
|
|||
|
||||
func TestGetUserIDByScreenName(t *testing.T) {
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
userID, err := scraper.GetUserIDByScreenName("Twitter")
|
||||
if err != nil {
|
||||
t.Errorf("getUserByScreenName() error = %v", err)
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ type Scraper struct {
|
|||
guestCreatedAt time.Time
|
||||
includeReplies bool
|
||||
isLogged bool
|
||||
isOpenAccount bool
|
||||
oAuthToken string
|
||||
oAuthSecret string
|
||||
proxy string
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package twitterscraper_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||
|
|
@ -11,9 +10,6 @@ import (
|
|||
var searchScraper = twitterscraper.New()
|
||||
|
||||
func TestFetchSearchCursor(t *testing.T) {
|
||||
if os.Getenv("SKIP_AUTH_TEST") != "" {
|
||||
t.Skip("Skipping test due to environment variable")
|
||||
}
|
||||
err := searchScraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
|
|
|||
|
|
@ -89,6 +89,7 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet {
|
|||
name := timeline.GlobalObjects.Users[tweet.UserIDStr].Name
|
||||
tw := &Tweet{
|
||||
ID: id,
|
||||
ConversationID: tweet.ConversationIDStr,
|
||||
Likes: tweet.FavoriteCount,
|
||||
Name: name,
|
||||
PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, id),
|
||||
|
|
@ -125,6 +126,10 @@ func (timeline *timelineV1) parseTweet(id string) *Tweet {
|
|||
tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
|
||||
}
|
||||
|
||||
if tweet.SelfThread.IDStr == id {
|
||||
tw.IsSelfThread = true
|
||||
}
|
||||
|
||||
if tweet.Views.Count != "" {
|
||||
views, viewsErr := strconv.Atoi(tweet.Views.Count)
|
||||
if viewsErr != nil {
|
||||
|
|
|
|||
53
tweets.go
53
tweets.go
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// GetTweets returns channel with tweets for a given user.
|
||||
|
|
@ -18,6 +19,9 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*
|
|||
return nil, "", err
|
||||
}
|
||||
|
||||
if s.isOpenAccount {
|
||||
return s.FetchTweetsByUserIDLegacy(userID, maxTweetsNbr, cursor)
|
||||
}
|
||||
return s.FetchTweetsByUserID(userID, maxTweetsNbr, cursor)
|
||||
}
|
||||
|
||||
|
|
@ -83,8 +87,56 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st
|
|||
return tweets, nextCursor, nil
|
||||
}
|
||||
|
||||
// FetchTweetsByUserIDLegacy gets tweets for a given userID, via the Twitter frontend legacy API.
|
||||
func (s *Scraper) FetchTweetsByUserIDLegacy(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
if maxTweetsNbr > 200 {
|
||||
maxTweetsNbr = 200
|
||||
}
|
||||
|
||||
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json")
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
q := req.URL.Query()
|
||||
q.Add("count", strconv.Itoa(maxTweetsNbr))
|
||||
q.Add("userId", userID)
|
||||
if cursor != "" {
|
||||
q.Add("cursor", cursor)
|
||||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timelineV1
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
tweets, nextCursor := timeline.parseTweets()
|
||||
return tweets, nextCursor, nil
|
||||
}
|
||||
|
||||
// GetTweet get a single tweet by ID.
|
||||
func (s *Scraper) GetTweet(id string) (*Tweet, error) {
|
||||
if s.isOpenAccount {
|
||||
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/conversation/"+id+".json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var timeline timelineV1
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tweets, _ := timeline.parseTweets()
|
||||
for _, tweet := range tweets {
|
||||
if tweet.ID == id {
|
||||
return tweet, nil
|
||||
}
|
||||
}
|
||||
} else {
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -153,5 +205,6 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) {
|
|||
return tweet, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("tweet with ID %s not found", id)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package twitterscraper_test
|
|||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
|
@ -21,6 +22,10 @@ func TestGetTweets(t *testing.T) {
|
|||
maxTweetsNbr := 300
|
||||
dupcheck := make(map[string]bool)
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
for tweet := range scraper.GetTweets(context.Background(), "Twitter", maxTweetsNbr) {
|
||||
if tweet.Error != nil {
|
||||
t.Error(tweet.Error)
|
||||
|
|
@ -73,6 +78,10 @@ func TestGetTweets(t *testing.T) {
|
|||
|
||||
func assertGetTweet(t *testing.T, expectedTweet *twitterscraper.Tweet) {
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
actualTweet, err := scraper.GetTweet(expectedTweet.ID)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -124,6 +133,9 @@ func TestGetTweetWithMultiplePhotos(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestGetTweetWithGIF(t *testing.T) {
|
||||
if os.Getenv("SKIP_AUTH_TEST") != "" {
|
||||
t.Skip("Skipping test due to environment variable")
|
||||
}
|
||||
expectedTweet := twitterscraper.Tweet{
|
||||
ConversationID: "1288540609310056450",
|
||||
GIFs: []twitterscraper.GIF{
|
||||
|
|
@ -148,6 +160,9 @@ func TestGetTweetWithGIF(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestGetTweetWithPhotoAndGIF(t *testing.T) {
|
||||
if os.Getenv("SKIP_AUTH_TEST") != "" {
|
||||
t.Skip("Skipping test due to environment variable")
|
||||
}
|
||||
expectedTweet := twitterscraper.Tweet{
|
||||
ConversationID: "1580661436132757506",
|
||||
GIFs: []twitterscraper.GIF{
|
||||
|
|
@ -178,6 +193,10 @@ func TestTweetMentions(t *testing.T) {
|
|||
Name: "David McRaney",
|
||||
}}
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1554522888904101890")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -210,6 +229,10 @@ func TestQuotedAndReply(t *testing.T) {
|
|||
Username: "VsauceTwo",
|
||||
}
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1237110897597976576")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -239,6 +262,7 @@ func TestRetweet(t *testing.T) {
|
|||
ConversationID: "1359151057872580612",
|
||||
HTML: "We’ve seen an increase in attacks against Asian communities and individuals around the world. It’s important to know that this isn’t new; throughout history, Asians have experienced violence and exclusion. However, their diverse lived experiences have largely been overlooked.",
|
||||
ID: "1359151057872580612",
|
||||
IsSelfThread: true,
|
||||
Likes: 6683,
|
||||
Name: "Twitter Together",
|
||||
PermanentURL: "https://twitter.com/TwitterTogether/status/1359151057872580612",
|
||||
|
|
@ -251,6 +275,10 @@ func TestRetweet(t *testing.T) {
|
|||
Username: "TwitterTogether",
|
||||
}
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1362849141248974853")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -281,6 +309,10 @@ func TestTweetViews(t *testing.T) {
|
|||
Views: 3189278,
|
||||
}
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.LoginOpenAccount()
|
||||
if err != nil {
|
||||
t.Fatalf("LoginOpenAccount() error = %v", err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1606055187348688896")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
|
@ -292,7 +324,14 @@ func TestTweetViews(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTweetThread(t *testing.T) {
|
||||
if os.Getenv("SKIP_AUTH_TEST") != "" {
|
||||
t.Skip("Skipping test due to environment variable")
|
||||
}
|
||||
scraper := twitterscraper.New()
|
||||
err := scraper.Login(username, password)
|
||||
if err != nil {
|
||||
t.Fatalf("Login() error = %v", err)
|
||||
}
|
||||
tweet, err := scraper.GetTweet("1665602315745673217")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
|
|
|||
3
types.go
3
types.go
|
|
@ -133,6 +133,9 @@ type (
|
|||
Result *result `json:"result"`
|
||||
} `json:"retweeted_status_result"`
|
||||
QuotedStatusIDStr string `json:"quoted_status_id_str"`
|
||||
SelfThread struct {
|
||||
IDStr string `json:"id_str"`
|
||||
} `json:"self_thread"`
|
||||
Time time.Time `json:"time"`
|
||||
UserIDStr string `json:"user_id_str"`
|
||||
Views struct {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue