2020-05-13 17:35:44 +02:00
|
|
|
package twitterscraper
|
2020-05-14 14:59:33 +02:00
|
|
|
|
|
|
|
|
import (
|
2020-06-12 21:31:08 +08:00
|
|
|
"context"
|
2023-04-23 17:32:28 +03:00
|
|
|
"errors"
|
2023-07-03 14:38:46 +03:00
|
|
|
"net/url"
|
2020-12-11 20:58:49 +02:00
|
|
|
"strconv"
|
2020-05-14 14:59:33 +02:00
|
|
|
)
|
|
|
|
|
|
2023-07-03 14:38:46 +03:00
|
|
|
const searchURL = "https://twitter.com/i/api/graphql/nK1dw4oV3k4w5TdtcAdSww/SearchTimeline"
|
|
|
|
|
|
|
|
|
|
type searchTimeline struct {
|
|
|
|
|
Data struct {
|
|
|
|
|
SearchByRawQuery struct {
|
|
|
|
|
SearchTimeline struct {
|
|
|
|
|
Timeline struct {
|
|
|
|
|
Instructions []struct {
|
|
|
|
|
Type string `json:"type"`
|
|
|
|
|
Entries []entry `json:"entries"`
|
|
|
|
|
Entry entry `json:"entry,omitempty"`
|
|
|
|
|
} `json:"instructions"`
|
|
|
|
|
} `json:"timeline"`
|
|
|
|
|
} `json:"search_timeline"`
|
|
|
|
|
} `json:"search_by_raw_query"`
|
|
|
|
|
} `json:"data"`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (timeline *searchTimeline) parseTweets() ([]*Tweet, string) {
|
|
|
|
|
tweets := make([]*Tweet, 0)
|
|
|
|
|
cursor := ""
|
|
|
|
|
for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
|
|
|
|
|
if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
|
|
|
|
|
if instruction.Entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = instruction.Entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.ItemContent.TweetDisplayType == "Tweet" {
|
2024-04-26 00:29:36 +03:00
|
|
|
var legacy *legacyTweet = &entry.Content.ItemContent.TweetResults.Result.Legacy
|
|
|
|
|
var user *legacyUser = &entry.Content.ItemContent.TweetResults.Result.Core.UserResults.Result.Legacy
|
|
|
|
|
if entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
|
|
|
|
legacy = &entry.Content.ItemContent.TweetResults.Result.Tweet.Legacy
|
|
|
|
|
user = &entry.Content.ItemContent.TweetResults.Result.Tweet.Core.UserResults.Result.Legacy
|
|
|
|
|
}
|
|
|
|
|
if tweet := parseLegacyTweet(user, legacy); tweet != nil {
|
|
|
|
|
var views = entry.Content.ItemContent.TweetResults.Result.Views.Count
|
|
|
|
|
if entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
|
|
|
|
|
views = entry.Content.ItemContent.TweetResults.Result.Tweet.Views.Count
|
|
|
|
|
}
|
|
|
|
|
if tweet.Views == 0 && views != "" {
|
|
|
|
|
tweet.Views, _ = strconv.Atoi(views)
|
2023-07-03 14:38:46 +03:00
|
|
|
}
|
|
|
|
|
tweets = append(tweets, tweet)
|
|
|
|
|
}
|
|
|
|
|
} else if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return tweets, cursor
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (timeline *searchTimeline) parseUsers() ([]*Profile, string) {
|
|
|
|
|
profiles := make([]*Profile, 0)
|
|
|
|
|
cursor := ""
|
|
|
|
|
for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
|
|
|
|
|
if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
|
|
|
|
|
if instruction.Entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = instruction.Entry.Content.Value
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, entry := range instruction.Entries {
|
|
|
|
|
if entry.Content.ItemContent.UserDisplayType == "User" {
|
2024-02-03 14:53:11 +01:00
|
|
|
if profile := parseProfileV2(entry.Content.ItemContent.UserResults.Result); profile.Name != "" {
|
2023-07-03 14:38:46 +03:00
|
|
|
if profile.UserID == "" {
|
|
|
|
|
profile.UserID = entry.Content.ItemContent.UserResults.Result.RestID
|
|
|
|
|
}
|
|
|
|
|
profiles = append(profiles, &profile)
|
|
|
|
|
}
|
|
|
|
|
} else if entry.Content.CursorType == "Bottom" {
|
|
|
|
|
cursor = entry.Content.Value
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return profiles, cursor
|
|
|
|
|
}
|
2023-05-30 17:31:00 +03:00
|
|
|
|
2020-05-15 17:52:06 +02:00
|
|
|
// SearchTweets returns channel with tweets for a given search query
|
2021-04-22 21:38:49 +03:00
|
|
|
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
|
|
|
|
|
return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
|
2020-12-12 23:33:57 +02:00
|
|
|
}
|
|
|
|
|
|
2021-04-22 21:38:49 +03:00
|
|
|
// SearchProfiles returns channel with profiles for a given search query
|
|
|
|
|
func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
|
|
|
|
|
return getUserTimeline(ctx, query, maxProfilesNbr, s.FetchSearchProfiles)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
2023-07-03 14:38:46 +03:00
|
|
|
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*searchTimeline, error) {
|
2023-04-23 17:32:28 +03:00
|
|
|
if !s.isLogged {
|
|
|
|
|
return nil, errors.New("scraper is not logged in for search")
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-22 21:38:49 +03:00
|
|
|
if maxNbr > 50 {
|
|
|
|
|
maxNbr = 50
|
2020-12-03 21:42:16 +07:00
|
|
|
}
|
|
|
|
|
|
2023-05-30 17:31:00 +03:00
|
|
|
req, err := s.newRequest("GET", searchURL)
|
2020-05-14 14:59:33 +02:00
|
|
|
if err != nil {
|
2021-04-22 21:38:49 +03:00
|
|
|
return nil, err
|
2020-05-14 14:59:33 +02:00
|
|
|
}
|
2020-06-15 15:26:43 +03:00
|
|
|
|
2023-07-03 14:38:46 +03:00
|
|
|
variables := map[string]interface{}{
|
|
|
|
|
"rawQuery": query,
|
|
|
|
|
"count": maxNbr,
|
|
|
|
|
"querySource": "typed_query",
|
|
|
|
|
"product": "Top",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
features := map[string]interface{}{
|
|
|
|
|
"rweb_lists_timeline_redesign_enabled": true,
|
|
|
|
|
"responsive_web_graphql_exclude_directive_enabled": true,
|
|
|
|
|
"verified_phone_label_enabled": false,
|
|
|
|
|
"creator_subscriptions_tweet_preview_api_enabled": true,
|
|
|
|
|
"responsive_web_graphql_timeline_navigation_enabled": true,
|
|
|
|
|
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
|
|
|
|
|
"tweetypie_unmention_optimization_enabled": true,
|
|
|
|
|
"responsive_web_edit_tweet_api_enabled": true,
|
|
|
|
|
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
|
|
|
|
|
"view_counts_everywhere_api_enabled": true,
|
|
|
|
|
"longform_notetweets_consumption_enabled": true,
|
|
|
|
|
"responsive_web_twitter_article_tweet_consumption_enabled": false,
|
|
|
|
|
"tweet_awards_web_tipping_enabled": false,
|
|
|
|
|
"freedom_of_speech_not_reach_fetch_enabled": true,
|
|
|
|
|
"standardized_nudges_misinfo": true,
|
|
|
|
|
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
|
|
|
|
|
"longform_notetweets_rich_text_read_enabled": true,
|
|
|
|
|
"longform_notetweets_inline_media_enabled": true,
|
|
|
|
|
"responsive_web_media_download_video_enabled": false,
|
|
|
|
|
"responsive_web_enhance_cards_enabled": false,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fieldToggles := map[string]interface{}{
|
|
|
|
|
"withArticleRichContentState": false,
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-11 20:58:49 +02:00
|
|
|
if cursor != "" {
|
2023-07-03 14:38:46 +03:00
|
|
|
variables["cursor"] = cursor
|
2020-05-14 14:59:33 +02:00
|
|
|
}
|
2020-12-23 19:53:48 +02:00
|
|
|
switch s.searchMode {
|
|
|
|
|
case SearchLatest:
|
2023-07-03 14:38:46 +03:00
|
|
|
variables["product"] = "Latest"
|
2020-12-23 19:53:48 +02:00
|
|
|
case SearchPhotos:
|
2023-07-03 14:38:46 +03:00
|
|
|
variables["product"] = "Photos"
|
2020-12-23 19:53:48 +02:00
|
|
|
case SearchVideos:
|
2023-07-03 14:38:46 +03:00
|
|
|
variables["product"] = "Videos"
|
2021-04-22 21:38:49 +03:00
|
|
|
case SearchUsers:
|
2023-07-03 14:38:46 +03:00
|
|
|
variables["product"] = "People"
|
2020-12-20 00:20:27 +07:00
|
|
|
}
|
|
|
|
|
|
2023-07-03 14:38:46 +03:00
|
|
|
q := url.Values{}
|
|
|
|
|
q.Set("variables", mapToJSONString(variables))
|
|
|
|
|
q.Set("features", mapToJSONString(features))
|
|
|
|
|
q.Set("fieldToggles", mapToJSONString(fieldToggles))
|
2020-12-11 20:58:49 +02:00
|
|
|
req.URL.RawQuery = q.Encode()
|
2020-05-14 14:59:33 +02:00
|
|
|
|
2023-07-03 14:38:46 +03:00
|
|
|
var timeline searchTimeline
|
2020-12-12 23:33:57 +02:00
|
|
|
err = s.RequestAPI(req, &timeline)
|
2020-05-14 14:59:33 +02:00
|
|
|
if err != nil {
|
2021-04-22 21:38:49 +03:00
|
|
|
return nil, err
|
2020-05-14 14:59:33 +02:00
|
|
|
}
|
2021-04-22 21:38:49 +03:00
|
|
|
return &timeline, nil
|
|
|
|
|
}
|
2020-05-14 14:59:33 +02:00
|
|
|
|
2021-04-22 21:38:49 +03:00
|
|
|
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
|
|
|
|
|
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
|
|
|
|
timeline, err := s.getSearchTimeline(query, maxTweetsNbr, cursor)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, "", err
|
|
|
|
|
}
|
2021-07-16 11:08:43 +03:00
|
|
|
tweets, nextCursor := timeline.parseTweets()
|
2020-09-19 13:37:50 +03:00
|
|
|
return tweets, nextCursor, nil
|
2020-05-14 14:59:33 +02:00
|
|
|
}
|
2021-04-22 21:38:49 +03:00
|
|
|
|
|
|
|
|
// FetchSearchProfiles gets users for a given search query, via the Twitter frontend API
|
|
|
|
|
func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) {
|
|
|
|
|
timeline, err := s.getSearchTimeline(query, maxProfilesNbr, cursor)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, "", err
|
|
|
|
|
}
|
2021-07-16 11:08:43 +03:00
|
|
|
users, nextCursor := timeline.parseUsers()
|
2021-04-22 21:38:49 +03:00
|
|
|
return users, nextCursor, nil
|
|
|
|
|
}
|