twitter-scrapper/search.go

196 lines
7.2 KiB
Go
Raw Normal View History

package twitterscraper
import (
"context"
"errors"
2023-07-03 14:38:46 +03:00
"net/url"
"strconv"
)
const searchURL = "https://x.com/i/api/graphql/nK1dw4oV3k4w5TdtcAdSww/SearchTimeline"
2023-07-03 14:38:46 +03:00
type searchTimeline struct {
Data struct {
SearchByRawQuery struct {
SearchTimeline struct {
Timeline struct {
Instructions []struct {
Type string `json:"type"`
Entries []entry `json:"entries"`
Entry entry `json:"entry,omitempty"`
} `json:"instructions"`
} `json:"timeline"`
} `json:"search_timeline"`
} `json:"search_by_raw_query"`
} `json:"data"`
}
func (timeline *searchTimeline) parseTweets() ([]*Tweet, string) {
tweets := make([]*Tweet, 0)
cursor := ""
for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
if instruction.Entry.Content.CursorType == "Bottom" {
cursor = instruction.Entry.Content.Value
continue
}
for _, entry := range instruction.Entries {
if entry.Content.ItemContent.TweetDisplayType == "Tweet" {
2024-04-26 00:29:36 +03:00
var legacy *legacyTweet = &entry.Content.ItemContent.TweetResults.Result.Legacy
var user *legacyUser = &entry.Content.ItemContent.TweetResults.Result.Core.UserResults.Result.Legacy
if entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
legacy = &entry.Content.ItemContent.TweetResults.Result.Tweet.Legacy
user = &entry.Content.ItemContent.TweetResults.Result.Tweet.Core.UserResults.Result.Legacy
}
if tweet := parseLegacyTweet(user, legacy); tweet != nil {
var views = entry.Content.ItemContent.TweetResults.Result.Views.Count
if entry.Content.ItemContent.TweetResults.Result.Typename == "TweetWithVisibilityResults" {
views = entry.Content.ItemContent.TweetResults.Result.Tweet.Views.Count
}
if tweet.Views == 0 && views != "" {
tweet.Views, _ = strconv.Atoi(views)
2023-07-03 14:38:46 +03:00
}
tweets = append(tweets, tweet)
}
} else if entry.Content.CursorType == "Bottom" {
cursor = entry.Content.Value
}
}
}
}
return tweets, cursor
}
func (timeline *searchTimeline) parseUsers() ([]*Profile, string) {
profiles := make([]*Profile, 0)
cursor := ""
for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
if instruction.Entry.Content.CursorType == "Bottom" {
cursor = instruction.Entry.Content.Value
continue
}
for _, entry := range instruction.Entries {
if entry.Content.ItemContent.UserDisplayType == "User" {
2024-02-03 14:53:11 +01:00
if profile := parseProfileV2(entry.Content.ItemContent.UserResults.Result); profile.Name != "" {
2023-07-03 14:38:46 +03:00
if profile.UserID == "" {
profile.UserID = entry.Content.ItemContent.UserResults.Result.RestID
}
profiles = append(profiles, &profile)
}
} else if entry.Content.CursorType == "Bottom" {
cursor = entry.Content.Value
}
}
}
}
return profiles, cursor
}
2023-05-30 17:31:00 +03:00
// SearchTweets returns channel with tweets for a given search query
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
2020-12-12 23:33:57 +02:00
}
// SearchProfiles returns channel with profiles for a given search query
func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
return getUserTimeline(ctx, query, maxProfilesNbr, s.FetchSearchProfiles)
}
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
2023-07-03 14:38:46 +03:00
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*searchTimeline, error) {
if !s.isLogged {
return nil, errors.New("scraper is not logged in for search")
}
if maxNbr > 50 {
maxNbr = 50
2020-12-03 21:42:16 +07:00
}
2023-05-30 17:31:00 +03:00
req, err := s.newRequest("GET", searchURL)
if err != nil {
return nil, err
}
2020-06-15 15:26:43 +03:00
2023-07-03 14:38:46 +03:00
variables := map[string]interface{}{
"rawQuery": query,
"count": maxNbr,
"querySource": "typed_query",
"product": "Top",
}
features := map[string]interface{}{
"rweb_lists_timeline_redesign_enabled": true,
"responsive_web_graphql_exclude_directive_enabled": true,
"verified_phone_label_enabled": false,
"creator_subscriptions_tweet_preview_api_enabled": true,
"responsive_web_graphql_timeline_navigation_enabled": true,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
"tweetypie_unmention_optimization_enabled": true,
"responsive_web_edit_tweet_api_enabled": true,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
"view_counts_everywhere_api_enabled": true,
"longform_notetweets_consumption_enabled": true,
"responsive_web_twitter_article_tweet_consumption_enabled": false,
"tweet_awards_web_tipping_enabled": false,
"freedom_of_speech_not_reach_fetch_enabled": true,
"standardized_nudges_misinfo": true,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
"longform_notetweets_rich_text_read_enabled": true,
"longform_notetweets_inline_media_enabled": true,
"responsive_web_media_download_video_enabled": false,
"responsive_web_enhance_cards_enabled": false,
}
fieldToggles := map[string]interface{}{
"withArticleRichContentState": false,
}
if cursor != "" {
2023-07-03 14:38:46 +03:00
variables["cursor"] = cursor
}
2020-12-23 19:53:48 +02:00
switch s.searchMode {
case SearchLatest:
2023-07-03 14:38:46 +03:00
variables["product"] = "Latest"
2020-12-23 19:53:48 +02:00
case SearchPhotos:
2023-07-03 14:38:46 +03:00
variables["product"] = "Photos"
2020-12-23 19:53:48 +02:00
case SearchVideos:
2023-07-03 14:38:46 +03:00
variables["product"] = "Videos"
case SearchUsers:
2023-07-03 14:38:46 +03:00
variables["product"] = "People"
}
2023-07-03 14:38:46 +03:00
q := url.Values{}
q.Set("variables", mapToJSONString(variables))
q.Set("features", mapToJSONString(features))
q.Set("fieldToggles", mapToJSONString(fieldToggles))
req.URL.RawQuery = q.Encode()
2023-07-03 14:38:46 +03:00
var timeline searchTimeline
2020-12-12 23:33:57 +02:00
err = s.RequestAPI(req, &timeline)
if err != nil {
return nil, err
}
return &timeline, nil
}
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
timeline, err := s.getSearchTimeline(query, maxTweetsNbr, cursor)
if err != nil {
return nil, "", err
}
2021-07-16 11:08:43 +03:00
tweets, nextCursor := timeline.parseTweets()
return tweets, nextCursor, nil
}
// FetchSearchProfiles gets users for a given search query, via the Twitter frontend API
func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) {
timeline, err := s.getSearchTimeline(query, maxProfilesNbr, cursor)
if err != nil {
return nil, "", err
}
2021-07-16 11:08:43 +03:00
users, nextCursor := timeline.parseUsers()
return users, nextCursor, nil
}