replace GetProfile with single call and improve error handling to fetch private profiles
This commit is contained in:
parent
1e048200bc
commit
d33882ff94
5 changed files with 126 additions and 16 deletions
11
api.go
11
api.go
|
|
@ -15,8 +15,12 @@ type user struct {
|
|||
Data struct {
|
||||
User struct {
|
||||
RestID string `json:"rest_id"`
|
||||
Legacy User `json:"legacy"`
|
||||
} `json:"user"`
|
||||
} `json:"data"`
|
||||
Errors []struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"errors"`
|
||||
}
|
||||
|
||||
// Global cache for user IDs
|
||||
|
|
@ -40,7 +44,8 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
|||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
// private profiles return forbidden, but also data
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
|
||||
return fmt.Errorf("response status %s", resp.Status)
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +109,10 @@ func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
if len(jsn.Errors) > 0 {
|
||||
return "", fmt.Errorf("%s", jsn.Errors[0].Message)
|
||||
}
|
||||
|
||||
if jsn.Data.User.RestID == "" {
|
||||
return "", fmt.Errorf("rest_id not found")
|
||||
}
|
||||
|
|
|
|||
2
go.mod
2
go.mod
|
|
@ -1,4 +1,4 @@
|
|||
module github.com/n0madic/twitter-scraper
|
||||
module github.com/dataxpe/twitter-scraper
|
||||
|
||||
go 1.13
|
||||
|
||||
|
|
|
|||
28
profile.go
28
profile.go
|
|
@ -2,6 +2,7 @@ package twitterscraper
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
|
@ -31,32 +32,31 @@ type Profile struct {
|
|||
|
||||
// GetProfile return parsed user profile.
|
||||
func (s *Scraper) GetProfile(username string) (Profile, error) {
|
||||
userID, err := s.GetUserIDByScreenName(username)
|
||||
var jsn user
|
||||
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22"+username+"%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/timeline/profile/"+userID+".json")
|
||||
err = s.RequestAPI(req, &jsn)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
}
|
||||
|
||||
q := req.URL.Query()
|
||||
q.Add("count", "20")
|
||||
q.Add("userId", userID)
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return Profile{}, err
|
||||
if len(jsn.Errors) > 0 {
|
||||
return Profile{}, fmt.Errorf("%s", jsn.Errors[0].Message)
|
||||
}
|
||||
|
||||
user, found := timeline.GlobalObjects.Users[userID]
|
||||
if !found {
|
||||
if jsn.Data.User.RestID == "" {
|
||||
return Profile{}, fmt.Errorf("rest_id not found")
|
||||
}
|
||||
|
||||
if jsn.Data.User.Legacy.Name == "" {
|
||||
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
|
||||
}
|
||||
|
||||
user := jsn.Data.User.Legacy
|
||||
|
||||
profile := Profile{
|
||||
Avatar: user.ProfileImageURLHTTPS,
|
||||
Banner: user.ProfileBannerURL,
|
||||
|
|
@ -73,7 +73,7 @@ func (s *Scraper) GetProfile(username string) (Profile, error) {
|
|||
PinnedTweetIDs: user.PinnedTweetIdsStr,
|
||||
TweetsCount: user.StatusesCount,
|
||||
URL: "https://twitter.com/" + user.ScreenName,
|
||||
UserID: user.IDStr,
|
||||
UserID: jsn.Data.User.RestID,
|
||||
Username: user.ScreenName,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,3 +58,77 @@ func TestGetProfile(t *testing.T) {
|
|||
t.Error("Expected TweetsCount is greater than zero")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProfilePrivate(t *testing.T) {
|
||||
loc := time.FixedZone("UTC", 0)
|
||||
joined := time.Date(2009, 8, 12, 6, 18, 29, 0, loc)
|
||||
sample := Profile{
|
||||
Avatar: "https://pbs.twimg.com/profile_images/1352282054256324610/_v3nslbW_normal.jpg",
|
||||
Banner: "https://pbs.twimg.com/profile_banners/64958707/1551520603",
|
||||
Biography: "",
|
||||
// Birthday: "March 21",
|
||||
IsPrivate: true,
|
||||
IsVerified: false,
|
||||
Joined: &joined,
|
||||
Location: "",
|
||||
Name: "saidah.jpg",
|
||||
PinnedTweetIDs: []string{},
|
||||
URL: "https://twitter.com/sdhftrh",
|
||||
UserID: "64958707",
|
||||
Username: "sdhftrh",
|
||||
Website: "https://youtu.be/0liuo2Q4bGo",
|
||||
}
|
||||
|
||||
// some random private profile (found via google)
|
||||
profile, err := GetProfile("sdhftrh")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
cmpOptions := cmp.Options{
|
||||
cmpopts.IgnoreFields(Profile{}, "FollowersCount"),
|
||||
cmpopts.IgnoreFields(Profile{}, "FollowingCount"),
|
||||
cmpopts.IgnoreFields(Profile{}, "FriendsCount"),
|
||||
cmpopts.IgnoreFields(Profile{}, "LikesCount"),
|
||||
cmpopts.IgnoreFields(Profile{}, "ListedCount"),
|
||||
cmpopts.IgnoreFields(Profile{}, "TweetsCount"),
|
||||
}
|
||||
if diff := cmp.Diff(sample, profile, cmpOptions...); diff != "" {
|
||||
t.Error("Resulting profile does not match the sample", diff)
|
||||
}
|
||||
|
||||
if profile.FollowersCount == 0 {
|
||||
t.Error("Expected FollowersCount is greater than zero")
|
||||
}
|
||||
if profile.FollowingCount == 0 {
|
||||
t.Error("Expected FollowingCount is greater than zero")
|
||||
}
|
||||
if profile.LikesCount == 0 {
|
||||
t.Error("Expected LikesCount is greater than zero")
|
||||
}
|
||||
if profile.TweetsCount == 0 {
|
||||
t.Error("Expected TweetsCount is greater than zero")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProfileErrorSuspended(t *testing.T) {
|
||||
_, err := GetProfile("123")
|
||||
if err == nil {
|
||||
t.Error("Expected Error, got success")
|
||||
} else {
|
||||
if err.Error() != "Authorization: User has been suspended. (63)" {
|
||||
t.Errorf("Expected error 'Authorization: User has been suspended. (63)', got '%s'", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProfileErrorNotFound(t *testing.T) {
|
||||
_, err := GetProfile("sample3123131")
|
||||
if err == nil {
|
||||
t.Error("Expected Error, got success")
|
||||
} else {
|
||||
if err.Error() != "Not found" {
|
||||
t.Errorf("Expected error 'Not found', got '%s'", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
27
types.go
27
types.go
|
|
@ -39,6 +39,33 @@ type (
|
|||
Error error
|
||||
}
|
||||
|
||||
// User type.
|
||||
User struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
Description string `json:"description"`
|
||||
Entities struct {
|
||||
URL struct {
|
||||
Urls []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
} `json:"urls"`
|
||||
} `json:"url"`
|
||||
} `json:"entities"`
|
||||
FavouritesCount int `json:"favourites_count"`
|
||||
FollowersCount int `json:"followers_count"`
|
||||
FriendsCount int `json:"friends_count"`
|
||||
IDStr string `json:"id_str"`
|
||||
ListedCount int `json:"listed_count"`
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location"`
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
||||
ProfileBannerURL string `json:"profile_banner_url"`
|
||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||
Protected bool `json:"protected"`
|
||||
ScreenName string `json:"screen_name"`
|
||||
StatusesCount int `json:"statuses_count"`
|
||||
Verified bool `json:"verified"`
|
||||
}
|
||||
|
||||
// timeline JSON
|
||||
timeline struct {
|
||||
GlobalObjects struct {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue