replace GetProfile with single call and improve error handling to fetch private profiles

This commit is contained in:
Michael 2021-01-25 10:31:41 +07:00
parent 1e048200bc
commit d33882ff94
5 changed files with 126 additions and 16 deletions

11
api.go
View file

@ -15,8 +15,12 @@ type user struct {
Data struct {
User struct {
RestID string `json:"rest_id"`
Legacy User `json:"legacy"`
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
}
// Global cache for user IDs
@ -40,7 +44,8 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
// private profiles return forbidden, but also data
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
return fmt.Errorf("response status %s", resp.Status)
}
@ -104,6 +109,10 @@ func (s *Scraper) GetUserIDByScreenName(screenName string) (string, error) {
return "", err
}
if len(jsn.Errors) > 0 {
return "", fmt.Errorf("%s", jsn.Errors[0].Message)
}
if jsn.Data.User.RestID == "" {
return "", fmt.Errorf("rest_id not found")
}

2
go.mod
View file

@ -1,4 +1,4 @@
module github.com/n0madic/twitter-scraper
module github.com/dataxpe/twitter-scraper
go 1.13

View file

@ -2,6 +2,7 @@ package twitterscraper
import (
"fmt"
"net/http"
"time"
)
@ -31,32 +32,31 @@ type Profile struct {
// GetProfile return parsed user profile.
func (s *Scraper) GetProfile(username string) (Profile, error) {
userID, err := s.GetUserIDByScreenName(username)
var jsn user
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22"+username+"%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
if err != nil {
return Profile{}, err
}
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/timeline/profile/"+userID+".json")
err = s.RequestAPI(req, &jsn)
if err != nil {
return Profile{}, err
}
q := req.URL.Query()
q.Add("count", "20")
q.Add("userId", userID)
req.URL.RawQuery = q.Encode()
var timeline timeline
err = s.RequestAPI(req, &timeline)
if err != nil {
return Profile{}, err
if len(jsn.Errors) > 0 {
return Profile{}, fmt.Errorf("%s", jsn.Errors[0].Message)
}
user, found := timeline.GlobalObjects.Users[userID]
if !found {
if jsn.Data.User.RestID == "" {
return Profile{}, fmt.Errorf("rest_id not found")
}
if jsn.Data.User.Legacy.Name == "" {
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}
user := jsn.Data.User.Legacy
profile := Profile{
Avatar: user.ProfileImageURLHTTPS,
Banner: user.ProfileBannerURL,
@ -73,7 +73,7 @@ func (s *Scraper) GetProfile(username string) (Profile, error) {
PinnedTweetIDs: user.PinnedTweetIdsStr,
TweetsCount: user.StatusesCount,
URL: "https://twitter.com/" + user.ScreenName,
UserID: user.IDStr,
UserID: jsn.Data.User.RestID,
Username: user.ScreenName,
}

View file

@ -58,3 +58,77 @@ func TestGetProfile(t *testing.T) {
t.Error("Expected TweetsCount is greater than zero")
}
}
func TestGetProfilePrivate(t *testing.T) {
loc := time.FixedZone("UTC", 0)
joined := time.Date(2009, 8, 12, 6, 18, 29, 0, loc)
sample := Profile{
Avatar: "https://pbs.twimg.com/profile_images/1352282054256324610/_v3nslbW_normal.jpg",
Banner: "https://pbs.twimg.com/profile_banners/64958707/1551520603",
Biography: "",
// Birthday: "March 21",
IsPrivate: true,
IsVerified: false,
Joined: &joined,
Location: "",
Name: "saidah.jpg",
PinnedTweetIDs: []string{},
URL: "https://twitter.com/sdhftrh",
UserID: "64958707",
Username: "sdhftrh",
Website: "https://youtu.be/0liuo2Q4bGo",
}
// some random private profile (found via google)
profile, err := GetProfile("sdhftrh")
if err != nil {
t.Error(err)
}
cmpOptions := cmp.Options{
cmpopts.IgnoreFields(Profile{}, "FollowersCount"),
cmpopts.IgnoreFields(Profile{}, "FollowingCount"),
cmpopts.IgnoreFields(Profile{}, "FriendsCount"),
cmpopts.IgnoreFields(Profile{}, "LikesCount"),
cmpopts.IgnoreFields(Profile{}, "ListedCount"),
cmpopts.IgnoreFields(Profile{}, "TweetsCount"),
}
if diff := cmp.Diff(sample, profile, cmpOptions...); diff != "" {
t.Error("Resulting profile does not match the sample", diff)
}
if profile.FollowersCount == 0 {
t.Error("Expected FollowersCount is greater than zero")
}
if profile.FollowingCount == 0 {
t.Error("Expected FollowingCount is greater than zero")
}
if profile.LikesCount == 0 {
t.Error("Expected LikesCount is greater than zero")
}
if profile.TweetsCount == 0 {
t.Error("Expected TweetsCount is greater than zero")
}
}
func TestGetProfileErrorSuspended(t *testing.T) {
_, err := GetProfile("123")
if err == nil {
t.Error("Expected Error, got success")
} else {
if err.Error() != "Authorization: User has been suspended. (63)" {
t.Errorf("Expected error 'Authorization: User has been suspended. (63)', got '%s'", err)
}
}
}
func TestGetProfileErrorNotFound(t *testing.T) {
_, err := GetProfile("sample3123131")
if err == nil {
t.Error("Expected Error, got success")
} else {
if err.Error() != "Not found" {
t.Errorf("Expected error 'Not found', got '%s'", err)
}
}
}

View file

@ -39,6 +39,33 @@ type (
Error error
}
// User type.
User struct {
CreatedAt string `json:"created_at"`
Description string `json:"description"`
Entities struct {
URL struct {
Urls []struct {
ExpandedURL string `json:"expanded_url"`
} `json:"urls"`
} `json:"url"`
} `json:"entities"`
FavouritesCount int `json:"favourites_count"`
FollowersCount int `json:"followers_count"`
FriendsCount int `json:"friends_count"`
IDStr string `json:"id_str"`
ListedCount int `json:"listed_count"`
Name string `json:"name"`
Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"`
ScreenName string `json:"screen_name"`
StatusesCount int `json:"statuses_count"`
Verified bool `json:"verified"`
}
// timeline JSON
timeline struct {
GlobalObjects struct {