Add SearchProfiles
Minor changes and fixes
This commit is contained in:
parent
f3597d0db6
commit
5032ecd29d
9 changed files with 268 additions and 100 deletions
23
README.md
23
README.md
|
|
@ -103,6 +103,7 @@ Options:
|
||||||
* `twitterscraper.SearchLatest` - live mode
|
* `twitterscraper.SearchLatest` - live mode
|
||||||
* `twitterscraper.SearchPhotos` - image mode
|
* `twitterscraper.SearchPhotos` - image mode
|
||||||
* `twitterscraper.SearchVideos` - video mode
|
* `twitterscraper.SearchVideos` - video mode
|
||||||
|
* `twitterscraper.SearchUsers` - user mode
|
||||||
|
|
||||||
### Get profile
|
### Get profile
|
||||||
|
|
||||||
|
|
@ -124,6 +125,28 @@ func main() {
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Search profiles by query
|
||||||
|
|
||||||
|
```golang
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
twitterscraper "github.com/n0madic/twitter-scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
|
||||||
|
for profile := range scraper.SearchUsers(context.Background(), "Twitter", 50) {
|
||||||
|
if profile.Error != nil {
|
||||||
|
panic(profile.Error)
|
||||||
|
}
|
||||||
|
fmt.Println(profile.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Get trends
|
### Get trends
|
||||||
|
|
||||||
```golang
|
```golang
|
||||||
|
|
|
||||||
42
api.go
42
api.go
|
|
@ -11,42 +11,6 @@ import (
|
||||||
|
|
||||||
const bearerToken string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
const bearerToken string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||||
|
|
||||||
type user struct {
|
|
||||||
Data struct {
|
|
||||||
User struct {
|
|
||||||
RestID string `json:"rest_id"`
|
|
||||||
Legacy struct {
|
|
||||||
CreatedAt string `json:"created_at"`
|
|
||||||
Description string `json:"description"`
|
|
||||||
Entities struct {
|
|
||||||
URL struct {
|
|
||||||
Urls []struct {
|
|
||||||
ExpandedURL string `json:"expanded_url"`
|
|
||||||
} `json:"urls"`
|
|
||||||
} `json:"url"`
|
|
||||||
} `json:"entities"`
|
|
||||||
FavouritesCount int `json:"favourites_count"`
|
|
||||||
FollowersCount int `json:"followers_count"`
|
|
||||||
FriendsCount int `json:"friends_count"`
|
|
||||||
IDStr string `json:"id_str"`
|
|
||||||
ListedCount int `json:"listed_count"`
|
|
||||||
Name string `json:"name"`
|
|
||||||
Location string `json:"location"`
|
|
||||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
|
||||||
ProfileBannerURL string `json:"profile_banner_url"`
|
|
||||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
|
||||||
Protected bool `json:"protected"`
|
|
||||||
ScreenName string `json:"screen_name"`
|
|
||||||
StatusesCount int `json:"statuses_count"`
|
|
||||||
Verified bool `json:"verified"`
|
|
||||||
} `json:"legacy"`
|
|
||||||
} `json:"user"`
|
|
||||||
} `json:"data"`
|
|
||||||
Errors []struct {
|
|
||||||
Message string `json:"message"`
|
|
||||||
} `json:"errors"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global cache for user IDs
|
// Global cache for user IDs
|
||||||
var cacheIDs sync.Map
|
var cacheIDs sync.Map
|
||||||
|
|
||||||
|
|
@ -70,7 +34,8 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
|
||||||
|
|
||||||
// private profiles return forbidden, but also data
|
// private profiles return forbidden, but also data
|
||||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
|
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
|
||||||
return fmt.Errorf("response status %s", resp.Status)
|
content, _ := ioutil.ReadAll(resp.Body)
|
||||||
|
return fmt.Errorf("response status %s: %s", resp.Status, content)
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
|
if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
|
||||||
|
|
@ -95,7 +60,8 @@ func (s *Scraper) GetGuestToken() error {
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
return fmt.Errorf("response status %s", resp.Status)
|
content, _ := ioutil.ReadAll(resp.Body)
|
||||||
|
return fmt.Errorf("response status %s: %s", resp.Status, content)
|
||||||
}
|
}
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
47
profile.go
47
profile.go
|
|
@ -30,6 +30,18 @@ type Profile struct {
|
||||||
Website string
|
Website string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type user struct {
|
||||||
|
Data struct {
|
||||||
|
User struct {
|
||||||
|
RestID string `json:"rest_id"`
|
||||||
|
Legacy legacyUser `json:"legacy"`
|
||||||
|
} `json:"user"`
|
||||||
|
} `json:"data"`
|
||||||
|
Errors []struct {
|
||||||
|
Message string `json:"message"`
|
||||||
|
} `json:"errors"`
|
||||||
|
}
|
||||||
|
|
||||||
// GetProfile return parsed user profile.
|
// GetProfile return parsed user profile.
|
||||||
func (s *Scraper) GetProfile(username string) (Profile, error) {
|
func (s *Scraper) GetProfile(username string) (Profile, error) {
|
||||||
var jsn user
|
var jsn user
|
||||||
|
|
@ -50,44 +62,13 @@ func (s *Scraper) GetProfile(username string) (Profile, error) {
|
||||||
if jsn.Data.User.RestID == "" {
|
if jsn.Data.User.RestID == "" {
|
||||||
return Profile{}, fmt.Errorf("rest_id not found")
|
return Profile{}, fmt.Errorf("rest_id not found")
|
||||||
}
|
}
|
||||||
|
jsn.Data.User.Legacy.IDStr = jsn.Data.User.RestID
|
||||||
|
|
||||||
if jsn.Data.User.Legacy.ScreenName == "" {
|
if jsn.Data.User.Legacy.ScreenName == "" {
|
||||||
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
|
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
|
||||||
}
|
}
|
||||||
|
|
||||||
user := jsn.Data.User.Legacy
|
return parseProfile(jsn.Data.User.Legacy), nil
|
||||||
|
|
||||||
profile := Profile{
|
|
||||||
Avatar: user.ProfileImageURLHTTPS,
|
|
||||||
Banner: user.ProfileBannerURL,
|
|
||||||
Biography: user.Description,
|
|
||||||
FollowersCount: user.FollowersCount,
|
|
||||||
FollowingCount: user.FavouritesCount,
|
|
||||||
FriendsCount: user.FriendsCount,
|
|
||||||
IsPrivate: user.Protected,
|
|
||||||
IsVerified: user.Verified,
|
|
||||||
LikesCount: user.FavouritesCount,
|
|
||||||
ListedCount: user.ListedCount,
|
|
||||||
Location: user.Location,
|
|
||||||
Name: user.Name,
|
|
||||||
PinnedTweetIDs: user.PinnedTweetIdsStr,
|
|
||||||
TweetsCount: user.StatusesCount,
|
|
||||||
URL: "https://twitter.com/" + user.ScreenName,
|
|
||||||
UserID: jsn.Data.User.RestID,
|
|
||||||
Username: user.ScreenName,
|
|
||||||
}
|
|
||||||
|
|
||||||
tm, err := time.Parse(time.RubyDate, user.CreatedAt)
|
|
||||||
if err == nil {
|
|
||||||
tm = tm.UTC()
|
|
||||||
profile.Joined = &tm
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(user.Entities.URL.Urls) > 0 {
|
|
||||||
profile.Website = user.Entities.URL.Urls[0].ExpandedURL
|
|
||||||
}
|
|
||||||
|
|
||||||
return profile, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetProfile wrapper for default scraper
|
// GetProfile wrapper for default scraper
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,8 @@ const (
|
||||||
SearchPhotos
|
SearchPhotos
|
||||||
// SearchVideos - video mode
|
// SearchVideos - video mode
|
||||||
SearchVideos
|
SearchVideos
|
||||||
|
// SearchUsers - user mode
|
||||||
|
SearchUsers
|
||||||
)
|
)
|
||||||
|
|
||||||
var defaultScraper *Scraper
|
var defaultScraper *Scraper
|
||||||
|
|
|
||||||
52
search.go
52
search.go
|
|
@ -7,30 +7,40 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// SearchTweets returns channel with tweets for a given search query
|
// SearchTweets returns channel with tweets for a given search query
|
||||||
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
|
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
|
||||||
return getTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
|
return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchTweets wrapper for default Scraper
|
// SearchTweets wrapper for default Scraper
|
||||||
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
|
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
|
||||||
return defaultScraper.SearchTweets(ctx, query, maxTweetsNbr)
|
return defaultScraper.SearchTweets(ctx, query, maxTweetsNbr)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
// SearchProfiles returns channel with profiles for a given search query
|
||||||
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
|
||||||
|
return getUserTimeline(ctx, query, maxProfilesNbr, s.FetchSearchProfiles)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchProfiles wrapper for default Scraper
|
||||||
|
func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
|
||||||
|
return defaultScraper.SearchProfiles(ctx, query, maxProfilesNbr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
||||||
|
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
|
||||||
query = url.PathEscape(query)
|
query = url.PathEscape(query)
|
||||||
if maxTweetsNbr > 100 {
|
if maxNbr > 50 {
|
||||||
maxTweetsNbr = 100
|
maxNbr = 50
|
||||||
}
|
}
|
||||||
|
|
||||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
|
req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, "", err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
q := req.URL.Query()
|
q := req.URL.Query()
|
||||||
q.Add("q", query)
|
q.Add("q", query)
|
||||||
q.Add("count", strconv.Itoa(maxTweetsNbr))
|
q.Add("count", strconv.Itoa(maxNbr))
|
||||||
q.Add("query_source", "typed_query")
|
q.Add("query_source", "typed_query")
|
||||||
q.Add("pc", "1")
|
q.Add("pc", "1")
|
||||||
q.Add("spelling_corrections", "1")
|
q.Add("spelling_corrections", "1")
|
||||||
|
|
@ -44,16 +54,36 @@ func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor strin
|
||||||
q.Add("result_filter", "image")
|
q.Add("result_filter", "image")
|
||||||
case SearchVideos:
|
case SearchVideos:
|
||||||
q.Add("result_filter", "video")
|
q.Add("result_filter", "video")
|
||||||
|
case SearchUsers:
|
||||||
|
q.Add("result_filter", "user")
|
||||||
}
|
}
|
||||||
|
|
||||||
req.URL.RawQuery = q.Encode()
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
var timeline timeline
|
var timeline timeline
|
||||||
err = s.RequestAPI(req, &timeline)
|
err = s.RequestAPI(req, &timeline)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &timeline, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
||||||
|
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||||
|
timeline, err := s.getSearchTimeline(query, maxTweetsNbr, cursor)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, "", err
|
return nil, "", err
|
||||||
}
|
}
|
||||||
|
tweets, nextCursor := parseTimeline(timeline)
|
||||||
tweets, nextCursor := parseTimeline(&timeline)
|
|
||||||
return tweets, nextCursor, nil
|
return tweets, nextCursor, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FetchSearchProfiles gets users for a given search query, via the Twitter frontend API
|
||||||
|
func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) {
|
||||||
|
timeline, err := s.getSearchTimeline(query, maxProfilesNbr, cursor)
|
||||||
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
users, nextCursor := parseUsers(timeline)
|
||||||
|
return users, nextCursor, nil
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,16 +16,42 @@ func TestFetchSearchCursor(t *testing.T) {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if cursor == "" {
|
if cursor == "" {
|
||||||
t.Fatal("Expected search cursor is not empty")
|
t.Fatal("Expected search cursor is empty")
|
||||||
}
|
}
|
||||||
tweetsNbr += len(tweets)
|
tweetsNbr += len(tweets)
|
||||||
nextCursor = cursor
|
nextCursor = cursor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetSearchProfiles(t *testing.T) {
|
||||||
|
count := 0
|
||||||
|
maxProfilesNbr := 150
|
||||||
|
dupcheck := make(map[string]bool)
|
||||||
|
scraper := New().SetSearchMode(SearchUsers)
|
||||||
|
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
|
||||||
|
if profile.Error != nil {
|
||||||
|
t.Error(profile.Error)
|
||||||
|
} else {
|
||||||
|
count++
|
||||||
|
if profile.UserID == "" {
|
||||||
|
t.Error("Expected UserID is empty")
|
||||||
|
} else {
|
||||||
|
if dupcheck[profile.UserID] {
|
||||||
|
t.Errorf("Detect duplicated UserID: %s", profile.UserID)
|
||||||
|
} else {
|
||||||
|
dupcheck[profile.UserID] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if count != maxProfilesNbr {
|
||||||
|
t.Errorf("Expected profiles count=%v, got: %v", maxProfilesNbr, count)
|
||||||
|
}
|
||||||
|
}
|
||||||
func TestGetSearchTweets(t *testing.T) {
|
func TestGetSearchTweets(t *testing.T) {
|
||||||
count := 0
|
count := 0
|
||||||
maxTweetsNbr := 250
|
maxTweetsNbr := 150
|
||||||
dupcheck := make(map[string]bool)
|
dupcheck := make(map[string]bool)
|
||||||
for tweet := range SearchTweets(context.Background(), "twitter -filter:retweets", maxTweetsNbr) {
|
for tweet := range SearchTweets(context.Background(), "twitter -filter:retweets", maxTweetsNbr) {
|
||||||
if tweet.Error != nil {
|
if tweet.Error != nil {
|
||||||
|
|
@ -33,7 +59,7 @@ func TestGetSearchTweets(t *testing.T) {
|
||||||
} else {
|
} else {
|
||||||
count++
|
count++
|
||||||
if tweet.ID == "" {
|
if tweet.ID == "" {
|
||||||
t.Error("Expected tweet ID is not empty")
|
t.Error("Expected tweet ID is empty")
|
||||||
} else {
|
} else {
|
||||||
if dupcheck[tweet.ID] {
|
if dupcheck[tweet.ID] {
|
||||||
t.Errorf("Detect duplicated tweet ID: %s", tweet.ID)
|
t.Errorf("Detect duplicated tweet ID: %s", tweet.ID)
|
||||||
|
|
@ -42,13 +68,13 @@ func TestGetSearchTweets(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if tweet.PermanentURL == "" {
|
if tweet.PermanentURL == "" {
|
||||||
t.Error("Expected tweet PermanentURL is not empty")
|
t.Error("Expected tweet PermanentURL is empty")
|
||||||
}
|
}
|
||||||
if tweet.IsRetweet {
|
if tweet.IsRetweet {
|
||||||
t.Error("Expected tweet IsRetweet is false")
|
t.Error("Expected tweet IsRetweet is false")
|
||||||
}
|
}
|
||||||
if tweet.Text == "" {
|
if tweet.Text == "" {
|
||||||
t.Error("Expected tweet Text is not empty")
|
t.Error("Expected tweet Text is empty")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,12 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
// GetTweets returns channel with tweets for a given user.
|
// GetTweets returns channel with tweets for a given user.
|
||||||
func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
|
func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
|
||||||
return getTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
|
return getTweetTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetTweets wrapper for default Scraper
|
// GetTweets wrapper for default Scraper
|
||||||
func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
|
func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
|
||||||
return defaultScraper.GetTweets(ctx, user, maxTweetsNbr)
|
return defaultScraper.GetTweets(ctx, user, maxTweetsNbr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
42
types.go
42
types.go
|
|
@ -43,12 +43,44 @@ type (
|
||||||
Videos []Video
|
Videos []Video
|
||||||
}
|
}
|
||||||
|
|
||||||
// Result of scrapping.
|
// ProfileResult of scrapping.
|
||||||
Result struct {
|
ProfileResult struct {
|
||||||
|
Profile
|
||||||
|
Error error
|
||||||
|
}
|
||||||
|
|
||||||
|
// TweetResult of scrapping.
|
||||||
|
TweetResult struct {
|
||||||
Tweet
|
Tweet
|
||||||
Error error
|
Error error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
legacyUser struct {
|
||||||
|
CreatedAt string `json:"created_at"`
|
||||||
|
Description string `json:"description"`
|
||||||
|
Entities struct {
|
||||||
|
URL struct {
|
||||||
|
Urls []struct {
|
||||||
|
ExpandedURL string `json:"expanded_url"`
|
||||||
|
} `json:"urls"`
|
||||||
|
} `json:"url"`
|
||||||
|
} `json:"entities"`
|
||||||
|
FavouritesCount int `json:"favourites_count"`
|
||||||
|
FollowersCount int `json:"followers_count"`
|
||||||
|
FriendsCount int `json:"friends_count"`
|
||||||
|
IDStr string `json:"id_str"`
|
||||||
|
ListedCount int `json:"listed_count"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Location string `json:"location"`
|
||||||
|
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
||||||
|
ProfileBannerURL string `json:"profile_banner_url"`
|
||||||
|
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||||
|
Protected bool `json:"protected"`
|
||||||
|
ScreenName string `json:"screen_name"`
|
||||||
|
StatusesCount int `json:"statuses_count"`
|
||||||
|
Verified bool `json:"verified"`
|
||||||
|
}
|
||||||
|
|
||||||
// timeline JSON
|
// timeline JSON
|
||||||
timeline struct {
|
timeline struct {
|
||||||
GlobalObjects struct {
|
GlobalObjects struct {
|
||||||
|
|
@ -128,6 +160,9 @@ type (
|
||||||
Tweet struct {
|
Tweet struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
} `json:"tweet"`
|
} `json:"tweet"`
|
||||||
|
User struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
} `json:"user"`
|
||||||
} `json:"content"`
|
} `json:"content"`
|
||||||
} `json:"item"`
|
} `json:"item"`
|
||||||
Operation struct {
|
Operation struct {
|
||||||
|
|
@ -185,5 +220,6 @@ type (
|
||||||
} `json:"timeline"`
|
} `json:"timeline"`
|
||||||
}
|
}
|
||||||
|
|
||||||
fetchFunc func(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)
|
fetchProfileFunc func(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error)
|
||||||
|
fetchTweetFunc func(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error)
|
||||||
)
|
)
|
||||||
|
|
|
||||||
118
util.go
118
util.go
|
|
@ -51,23 +51,68 @@ func (s *Scraper) newRequest(method string, url string) (*http.Request, error) {
|
||||||
return req, nil
|
return req, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTimeline(ctx context.Context, query string, maxTweetsNbr int, fetchFunc fetchFunc) <-chan *Result {
|
func getUserTimeline(ctx context.Context, query string, maxProfilesNbr int, fetchFunc fetchProfileFunc) <-chan *ProfileResult {
|
||||||
channel := make(chan *Result)
|
channel := make(chan *ProfileResult)
|
||||||
go func(user string) {
|
go func(query string) {
|
||||||
|
defer close(channel)
|
||||||
|
var nextCursor string
|
||||||
|
profilesNbr := 0
|
||||||
|
for profilesNbr < maxProfilesNbr {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
channel <- &ProfileResult{Error: ctx.Err()}
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
profiles, next, err := fetchFunc(query, maxProfilesNbr, nextCursor)
|
||||||
|
if err != nil {
|
||||||
|
channel <- &ProfileResult{Error: err}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(profiles) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, profile := range profiles {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
channel <- &ProfileResult{Error: ctx.Err()}
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
if profilesNbr < maxProfilesNbr {
|
||||||
|
nextCursor = next
|
||||||
|
channel <- &ProfileResult{Profile: *profile}
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
profilesNbr++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}(query)
|
||||||
|
return channel
|
||||||
|
}
|
||||||
|
|
||||||
|
func getTweetTimeline(ctx context.Context, query string, maxTweetsNbr int, fetchFunc fetchTweetFunc) <-chan *TweetResult {
|
||||||
|
channel := make(chan *TweetResult)
|
||||||
|
go func(query string) {
|
||||||
defer close(channel)
|
defer close(channel)
|
||||||
var nextCursor string
|
var nextCursor string
|
||||||
tweetsNbr := 0
|
tweetsNbr := 0
|
||||||
for tweetsNbr < maxTweetsNbr {
|
for tweetsNbr < maxTweetsNbr {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
channel <- &Result{Error: ctx.Err()}
|
channel <- &TweetResult{Error: ctx.Err()}
|
||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
tweets, next, err := fetchFunc(query, maxTweetsNbr, nextCursor)
|
tweets, next, err := fetchFunc(query, maxTweetsNbr, nextCursor)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
channel <- &Result{Error: err}
|
channel <- &TweetResult{Error: err}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -78,7 +123,7 @@ func getTimeline(ctx context.Context, query string, maxTweetsNbr int, fetchFunc
|
||||||
for _, tweet := range tweets {
|
for _, tweet := range tweets {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
channel <- &Result{Error: ctx.Err()}
|
channel <- &TweetResult{Error: ctx.Err()}
|
||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
@ -88,7 +133,7 @@ func getTimeline(ctx context.Context, query string, maxTweetsNbr int, fetchFunc
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
nextCursor = next
|
nextCursor = next
|
||||||
channel <- &Result{Tweet: *tweet}
|
channel <- &TweetResult{Tweet: *tweet}
|
||||||
} else {
|
} else {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
@ -99,6 +144,40 @@ func getTimeline(ctx context.Context, query string, maxTweetsNbr int, fetchFunc
|
||||||
return channel
|
return channel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseProfile(user legacyUser) Profile {
|
||||||
|
profile := Profile{
|
||||||
|
Avatar: user.ProfileImageURLHTTPS,
|
||||||
|
Banner: user.ProfileBannerURL,
|
||||||
|
Biography: user.Description,
|
||||||
|
FollowersCount: user.FollowersCount,
|
||||||
|
FollowingCount: user.FavouritesCount,
|
||||||
|
FriendsCount: user.FriendsCount,
|
||||||
|
IsPrivate: user.Protected,
|
||||||
|
IsVerified: user.Verified,
|
||||||
|
LikesCount: user.FavouritesCount,
|
||||||
|
ListedCount: user.ListedCount,
|
||||||
|
Location: user.Location,
|
||||||
|
Name: user.Name,
|
||||||
|
PinnedTweetIDs: user.PinnedTweetIdsStr,
|
||||||
|
TweetsCount: user.StatusesCount,
|
||||||
|
URL: "https://twitter.com/" + user.ScreenName,
|
||||||
|
UserID: user.IDStr,
|
||||||
|
Username: user.ScreenName,
|
||||||
|
}
|
||||||
|
|
||||||
|
tm, err := time.Parse(time.RubyDate, user.CreatedAt)
|
||||||
|
if err == nil {
|
||||||
|
tm = tm.UTC()
|
||||||
|
profile.Joined = &tm
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(user.Entities.URL.Urls) > 0 {
|
||||||
|
profile.Website = user.Entities.URL.Urls[0].ExpandedURL
|
||||||
|
}
|
||||||
|
|
||||||
|
return profile
|
||||||
|
}
|
||||||
|
|
||||||
func parseTimeline(timeline *timeline) ([]*Tweet, string) {
|
func parseTimeline(timeline *timeline) ([]*Tweet, string) {
|
||||||
tweets := make(map[string]Tweet)
|
tweets := make(map[string]Tweet)
|
||||||
|
|
||||||
|
|
@ -234,3 +313,28 @@ func parseTimeline(timeline *timeline) ([]*Tweet, string) {
|
||||||
}
|
}
|
||||||
return orderedTweets, cursor
|
return orderedTweets, cursor
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func parseUsers(timeline *timeline) ([]*Profile, string) {
|
||||||
|
users := make(map[string]Profile)
|
||||||
|
|
||||||
|
for id, user := range timeline.GlobalObjects.Users {
|
||||||
|
users[id] = parseProfile(user)
|
||||||
|
}
|
||||||
|
|
||||||
|
var cursor string
|
||||||
|
var orderedProfiles []*Profile
|
||||||
|
for _, instruction := range timeline.Timeline.Instructions {
|
||||||
|
for _, entry := range instruction.AddEntries.Entries {
|
||||||
|
if profile, ok := users[entry.Content.Item.Content.User.ID]; ok {
|
||||||
|
orderedProfiles = append(orderedProfiles, &profile)
|
||||||
|
}
|
||||||
|
if entry.Content.Operation.Cursor.CursorType == "Bottom" {
|
||||||
|
cursor = entry.Content.Operation.Cursor.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if instruction.ReplaceEntry.Entry.Content.Operation.Cursor.CursorType == "Bottom" {
|
||||||
|
cursor = instruction.ReplaceEntry.Entry.Content.Operation.Cursor.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return orderedProfiles, cursor
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue