diff --git a/README.md b/README.md
index 7c7cdb0..252199b 100644
--- a/README.md
+++ b/README.md
@@ -155,7 +155,7 @@ import (
func main() {
scraper := twitterscraper.New()
- err := scraper.LoginOpenAccount()
+ err := scraper.Login(username, password)
if err != nil {
panic(err)
}
diff --git a/search.go b/search.go
index b0fa45b..b4254f9 100644
--- a/search.go
+++ b/search.go
@@ -3,10 +3,79 @@ package twitterscraper
import (
"context"
"errors"
+ "net/url"
"strconv"
)
-const searchURL = "https://api.twitter.com/2/search/adaptive.json"
+const searchURL = "https://twitter.com/i/api/graphql/nK1dw4oV3k4w5TdtcAdSww/SearchTimeline"
+
+type searchTimeline struct {
+ Data struct {
+ SearchByRawQuery struct {
+ SearchTimeline struct {
+ Timeline struct {
+ Instructions []struct {
+ Type string `json:"type"`
+ Entries []entry `json:"entries"`
+ Entry entry `json:"entry,omitempty"`
+ } `json:"instructions"`
+ } `json:"timeline"`
+ } `json:"search_timeline"`
+ } `json:"search_by_raw_query"`
+ } `json:"data"`
+}
+
+func (timeline *searchTimeline) parseTweets() ([]*Tweet, string) {
+ tweets := make([]*Tweet, 0)
+ cursor := ""
+ for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
+ if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
+ if instruction.Entry.Content.CursorType == "Bottom" {
+ cursor = instruction.Entry.Content.Value
+ continue
+ }
+ for _, entry := range instruction.Entries {
+ if entry.Content.ItemContent.TweetDisplayType == "Tweet" {
+ if tweet := parseLegacyTweet(&entry.Content.ItemContent.TweetResults.Result.Core.UserResults.Result.Legacy, &entry.Content.ItemContent.TweetResults.Result.Legacy); tweet != nil {
+ if tweet.Views == 0 && entry.Content.ItemContent.TweetResults.Result.Views.Count != "" {
+ tweet.Views, _ = strconv.Atoi(entry.Content.ItemContent.TweetResults.Result.Views.Count)
+ }
+ tweets = append(tweets, tweet)
+ }
+ } else if entry.Content.CursorType == "Bottom" {
+ cursor = entry.Content.Value
+ }
+ }
+ }
+ }
+ return tweets, cursor
+}
+
+func (timeline *searchTimeline) parseUsers() ([]*Profile, string) {
+ profiles := make([]*Profile, 0)
+ cursor := ""
+ for _, instruction := range timeline.Data.SearchByRawQuery.SearchTimeline.Timeline.Instructions {
+ if instruction.Type == "TimelineAddEntries" || instruction.Type == "TimelineReplaceEntry" {
+ if instruction.Entry.Content.CursorType == "Bottom" {
+ cursor = instruction.Entry.Content.Value
+ continue
+ }
+ for _, entry := range instruction.Entries {
+ if entry.Content.ItemContent.UserDisplayType == "User" {
+ if profile := parseProfile(entry.Content.ItemContent.UserResults.Result.Legacy); profile.Name != "" {
+ if profile.UserID == "" {
+ profile.UserID = entry.Content.ItemContent.UserResults.Result.RestID
+ }
+ profiles = append(profiles, &profile)
+ }
+ } else if entry.Content.CursorType == "Bottom" {
+ cursor = entry.Content.Value
+ }
+ }
+ }
+ }
+ return profiles, cursor
+}
// SearchTweets returns channel with tweets for a given search query
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
@@ -19,7 +88,7 @@ func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesN
}
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
-func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timelineV1, error) {
+func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*searchTimeline, error) {
if !s.isLogged {
return nil, errors.New("scraper is not logged in for search")
}
@@ -33,31 +102,61 @@ func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*t
return nil, err
}
- q := req.URL.Query()
- q.Add("q", query)
- q.Add("count", strconv.Itoa(maxNbr))
- q.Add("query_source", "typed_query")
- q.Add("pc", "1")
- q.Add("requestContext", "launch")
- q.Add("spelling_corrections", "1")
- q.Add("include_ext_edit_control", "true")
+ variables := map[string]interface{}{
+ "rawQuery": query,
+ "count": maxNbr,
+ "querySource": "typed_query",
+ "product": "Top",
+ }
+
+ features := map[string]interface{}{
+ "rweb_lists_timeline_redesign_enabled": true,
+ "responsive_web_graphql_exclude_directive_enabled": true,
+ "verified_phone_label_enabled": false,
+ "creator_subscriptions_tweet_preview_api_enabled": true,
+ "responsive_web_graphql_timeline_navigation_enabled": true,
+ "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
+ "tweetypie_unmention_optimization_enabled": true,
+ "responsive_web_edit_tweet_api_enabled": true,
+ "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
+ "view_counts_everywhere_api_enabled": true,
+ "longform_notetweets_consumption_enabled": true,
+ "responsive_web_twitter_article_tweet_consumption_enabled": false,
+ "tweet_awards_web_tipping_enabled": false,
+ "freedom_of_speech_not_reach_fetch_enabled": true,
+ "standardized_nudges_misinfo": true,
+ "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
+ "longform_notetweets_rich_text_read_enabled": true,
+ "longform_notetweets_inline_media_enabled": true,
+ "responsive_web_media_download_video_enabled": false,
+ "responsive_web_enhance_cards_enabled": false,
+ }
+
+ fieldToggles := map[string]interface{}{
+ "withArticleRichContentState": false,
+ }
+
if cursor != "" {
- q.Add("cursor", cursor)
+ variables["cursor"] = cursor
}
switch s.searchMode {
case SearchLatest:
- q.Add("tweet_search_mode", "live")
+ variables["product"] = "Latest"
case SearchPhotos:
- q.Add("result_filter", "image")
+ variables["product"] = "Photos"
case SearchVideos:
- q.Add("result_filter", "video")
+ variables["product"] = "Videos"
case SearchUsers:
- q.Add("result_filter", "user")
+ variables["product"] = "People"
}
+ q := url.Values{}
+ q.Set("variables", mapToJSONString(variables))
+ q.Set("features", mapToJSONString(features))
+ q.Set("fieldToggles", mapToJSONString(fieldToggles))
req.URL.RawQuery = q.Encode()
- var timeline timelineV1
+ var timeline searchTimeline
err = s.RequestAPI(req, &timeline)
if err != nil {
return nil, err
diff --git a/search_test.go b/search_test.go
index fcd03b4..84050b1 100644
--- a/search_test.go
+++ b/search_test.go
@@ -2,18 +2,23 @@ package twitterscraper_test
import (
"context"
+ "os"
"testing"
twitterscraper "github.com/n0madic/twitter-scraper"
)
-var searchScraper = twitterscraper.New()
-
func TestFetchSearchCursor(t *testing.T) {
- err := searchScraper.LoginOpenAccount()
+ if os.Getenv("SKIP_AUTH_TEST") != "" {
+ t.Skip("Skipping test due to environment variable")
+ }
+ searchScraper := twitterscraper.New()
+ err := searchScraper.Login(username, password)
if err != nil {
t.Fatal(err)
}
+ defer searchScraper.Logout()
+
maxTweetsNbr := 150
tweetsNbr := 0
nextCursor := ""
@@ -31,13 +36,19 @@ func TestFetchSearchCursor(t *testing.T) {
}
func TestGetSearchProfiles(t *testing.T) {
+ if os.Getenv("SKIP_AUTH_TEST") != "" {
+ t.Skip("Skipping test due to environment variable")
+ }
count := 0
maxProfilesNbr := 150
dupcheck := make(map[string]bool)
- err := searchScraper.LoginOpenAccount()
+ searchScraper := twitterscraper.New()
+ err := searchScraper.Login(username, password)
if err != nil {
t.Fatal(err)
}
+ defer searchScraper.Logout()
+
searchScraper.SetSearchMode(twitterscraper.SearchUsers)
for profile := range searchScraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
if profile.Error != nil {
@@ -61,13 +72,19 @@ func TestGetSearchProfiles(t *testing.T) {
}
}
func TestGetSearchTweets(t *testing.T) {
+ if os.Getenv("SKIP_AUTH_TEST") != "" {
+ t.Skip("Skipping test due to environment variable")
+ }
count := 0
maxTweetsNbr := 150
dupcheck := make(map[string]bool)
- err := searchScraper.LoginOpenAccount()
+ searchScraper := twitterscraper.New()
+ err := searchScraper.Login(username, password)
if err != nil {
t.Fatal(err)
}
+ defer searchScraper.Logout()
+
searchScraper.SetSearchMode(twitterscraper.SearchLatest)
for tweet := range searchScraper.SearchTweets(context.Background(), "twitter", maxTweetsNbr) {
if tweet.Error != nil {
diff --git a/timeline_v2.go b/timeline_v2.go
index 020177e..b00db8e 100644
--- a/timeline_v2.go
+++ b/timeline_v2.go
@@ -1,10 +1,7 @@
package twitterscraper
import (
- "fmt"
"strconv"
- "strings"
- "time"
)
type result struct {
@@ -66,6 +63,13 @@ type entry struct {
TweetResults struct {
Result result `json:"result"`
} `json:"tweet_results"`
+ UserDisplayType string `json:"userDisplayType"`
+ UserResults struct {
+ Result struct {
+ RestID string `json:"rest_id"`
+ Legacy legacyUser `json:"legacy"`
+ } `json:"result"`
+ } `json:"user_results"`
} `json:"itemContent"`
} `json:"content"`
}
@@ -166,182 +170,3 @@ func (conversation *threadedConversation) parse() []*Tweet {
}
return tweets
}
-
-func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
- username := user.ScreenName
- name := user.Name
- tweetID := tweet.IDStr
- tw := &Tweet{
- ConversationID: tweet.ConversationIDStr,
- ID: tweetID,
- Likes: tweet.FavoriteCount,
- Name: name,
- PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID),
- Replies: tweet.ReplyCount,
- Retweets: tweet.RetweetCount,
- Text: tweet.FullText,
- UserID: tweet.UserIDStr,
- Username: username,
- }
-
- tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
- if err == nil {
- tw.TimeParsed = tm
- tw.Timestamp = tm.Unix()
- }
-
- if tweet.Place.ID != "" {
- tw.Place = &tweet.Place
- }
-
- if tweet.QuotedStatusIDStr != "" {
- tw.IsQuoted = true
- tw.QuotedStatusID = tweet.QuotedStatusIDStr
- }
- if tweet.InReplyToStatusIDStr != "" {
- tw.IsReply = true
- tw.InReplyToStatusID = tweet.InReplyToStatusIDStr
- }
- if tweet.RetweetedStatusIDStr != "" || tweet.RetweetedStatusResult.Result != nil {
- tw.IsRetweet = true
- tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
- if tweet.RetweetedStatusResult.Result != nil {
- tw.RetweetedStatus = parseLegacyTweet(&tweet.RetweetedStatusResult.Result.Core.UserResults.Result.Legacy, &tweet.RetweetedStatusResult.Result.Legacy)
- tw.RetweetedStatusID = tw.RetweetedStatus.ID
- }
- }
-
- if tweet.Views.Count != "" {
- views, viewsErr := strconv.Atoi(tweet.Views.Count)
- if viewsErr != nil {
- views = 0
- }
- tw.Views = views
- }
-
- for _, pinned := range user.PinnedTweetIdsStr {
- if tweet.IDStr == pinned {
- tw.IsPin = true
- break
- }
- }
-
- for _, hash := range tweet.Entities.Hashtags {
- tw.Hashtags = append(tw.Hashtags, hash.Text)
- }
-
- for _, mention := range tweet.Entities.UserMentions {
- tw.Mentions = append(tw.Mentions, Mention{
- ID: mention.IDStr,
- Username: mention.ScreenName,
- Name: mention.Name,
- })
- }
-
- for _, media := range tweet.ExtendedEntities.Media {
- if media.Type == "photo" {
- photo := Photo{
- ID: media.IDStr,
- URL: media.MediaURLHttps,
- }
-
- tw.Photos = append(tw.Photos, photo)
- } else if media.Type == "video" {
- video := Video{
- ID: media.IDStr,
- Preview: media.MediaURLHttps,
- }
-
- maxBitrate := 0
- for _, variant := range media.VideoInfo.Variants {
- if variant.Bitrate > maxBitrate {
- video.URL = strings.TrimSuffix(variant.URL, "?tag=10")
- maxBitrate = variant.Bitrate
- }
- }
-
- tw.Videos = append(tw.Videos, video)
- } else if media.Type == "animated_gif" {
- gif := GIF{
- ID: media.IDStr,
- Preview: media.MediaURLHttps,
- }
-
- // Twitter's API doesn't provide bitrate for GIFs, (it's always set to zero).
- // Therefore we check for `>=` instead of `>` in the loop below.
- // Also, GIFs have just a single variant today. Just in case that changes in the future,
- // and there will be multiple variants, we'll pick the one with the highest bitrate,
- // if other one will have a non-zero bitrate.
- maxBitrate := 0
- for _, variant := range media.VideoInfo.Variants {
- if variant.Bitrate >= maxBitrate {
- gif.URL = variant.URL
- maxBitrate = variant.Bitrate
- }
- }
-
- tw.GIFs = append(tw.GIFs, gif)
- }
-
- if !tw.SensitiveContent {
- sensitive := media.ExtSensitiveMediaWarning
- tw.SensitiveContent = sensitive.AdultContent || sensitive.GraphicViolence || sensitive.Other
- }
- }
-
- for _, url := range tweet.Entities.URLs {
- tw.URLs = append(tw.URLs, url.ExpandedURL)
- }
-
- tw.HTML = tweet.FullText
- tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
- return fmt.Sprintf(`%s`,
- strings.TrimPrefix(hashtag, "#"),
- hashtag,
- )
- })
- tw.HTML = reUsername.ReplaceAllStringFunc(tw.HTML, func(username string) string {
- return fmt.Sprintf(`%s`,
- strings.TrimPrefix(username, "@"),
- username,
- )
- })
- var foundedMedia []string
- tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
- for _, entity := range tweet.Entities.URLs {
- if tco == entity.URL {
- return fmt.Sprintf(`%s`, entity.ExpandedURL, tco)
- }
- }
- for _, entity := range tweet.ExtendedEntities.Media {
- if tco == entity.URL {
- foundedMedia = append(foundedMedia, entity.MediaURLHttps)
- return fmt.Sprintf(`
`, tco, entity.MediaURLHttps)
- }
- }
- return tco
- })
- for _, photo := range tw.Photos {
- url := photo.URL
- if stringInSlice(url, foundedMedia) {
- continue
- }
- tw.HTML += fmt.Sprintf(`
`, url)
- }
- for _, video := range tw.Videos {
- url := video.Preview
- if stringInSlice(url, foundedMedia) {
- continue
- }
- tw.HTML += fmt.Sprintf(`
`, url)
- }
- for _, gif := range tw.GIFs {
- url := gif.Preview
- if stringInSlice(url, foundedMedia) {
- continue
- }
- tw.HTML += fmt.Sprintf(`
`, url)
- }
- tw.HTML = strings.Replace(tw.HTML, "\n", "
", -1)
- return tw
-}
diff --git a/util.go b/util.go
index 42d0987..9bf6d55 100644
--- a/util.go
+++ b/util.go
@@ -3,10 +3,12 @@ package twitterscraper
import (
"context"
"encoding/json"
+ "fmt"
"net/http"
"net/url"
"regexp"
"strconv"
+ "strings"
"time"
)
@@ -150,6 +152,188 @@ func getTweetTimeline(ctx context.Context, query string, maxTweetsNbr int, fetch
return channel
}
+func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
+ tweetID := tweet.IDStr
+ if tweetID == "" {
+ return nil
+ }
+ username := user.ScreenName
+ name := user.Name
+ tw := &Tweet{
+ ConversationID: tweet.ConversationIDStr,
+ ID: tweetID,
+ Likes: tweet.FavoriteCount,
+ Name: name,
+ PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID),
+ Replies: tweet.ReplyCount,
+ Retweets: tweet.RetweetCount,
+ Text: tweet.FullText,
+ UserID: tweet.UserIDStr,
+ Username: username,
+ }
+
+ tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
+ if err == nil {
+ tw.TimeParsed = tm
+ tw.Timestamp = tm.Unix()
+ }
+
+ if tweet.Place.ID != "" {
+ tw.Place = &tweet.Place
+ }
+
+ if tweet.QuotedStatusIDStr != "" {
+ tw.IsQuoted = true
+ tw.QuotedStatusID = tweet.QuotedStatusIDStr
+ }
+ if tweet.InReplyToStatusIDStr != "" {
+ tw.IsReply = true
+ tw.InReplyToStatusID = tweet.InReplyToStatusIDStr
+ }
+ if tweet.RetweetedStatusIDStr != "" || tweet.RetweetedStatusResult.Result != nil {
+ tw.IsRetweet = true
+ tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
+ if tweet.RetweetedStatusResult.Result != nil {
+ tw.RetweetedStatus = parseLegacyTweet(&tweet.RetweetedStatusResult.Result.Core.UserResults.Result.Legacy, &tweet.RetweetedStatusResult.Result.Legacy)
+ tw.RetweetedStatusID = tw.RetweetedStatus.ID
+ }
+ }
+
+ if tweet.Views.Count != "" {
+ views, viewsErr := strconv.Atoi(tweet.Views.Count)
+ if viewsErr != nil {
+ views = 0
+ }
+ tw.Views = views
+ }
+
+ for _, pinned := range user.PinnedTweetIdsStr {
+ if tweet.IDStr == pinned {
+ tw.IsPin = true
+ break
+ }
+ }
+
+ for _, hash := range tweet.Entities.Hashtags {
+ tw.Hashtags = append(tw.Hashtags, hash.Text)
+ }
+
+ for _, mention := range tweet.Entities.UserMentions {
+ tw.Mentions = append(tw.Mentions, Mention{
+ ID: mention.IDStr,
+ Username: mention.ScreenName,
+ Name: mention.Name,
+ })
+ }
+
+ for _, media := range tweet.ExtendedEntities.Media {
+ if media.Type == "photo" {
+ photo := Photo{
+ ID: media.IDStr,
+ URL: media.MediaURLHttps,
+ }
+
+ tw.Photos = append(tw.Photos, photo)
+ } else if media.Type == "video" {
+ video := Video{
+ ID: media.IDStr,
+ Preview: media.MediaURLHttps,
+ }
+
+ maxBitrate := 0
+ for _, variant := range media.VideoInfo.Variants {
+ if variant.Bitrate > maxBitrate {
+ video.URL = strings.TrimSuffix(variant.URL, "?tag=10")
+ maxBitrate = variant.Bitrate
+ }
+ }
+
+ tw.Videos = append(tw.Videos, video)
+ } else if media.Type == "animated_gif" {
+ gif := GIF{
+ ID: media.IDStr,
+ Preview: media.MediaURLHttps,
+ }
+
+ // Twitter's API doesn't provide bitrate for GIFs, (it's always set to zero).
+ // Therefore we check for `>=` instead of `>` in the loop below.
+ // Also, GIFs have just a single variant today. Just in case that changes in the future,
+ // and there will be multiple variants, we'll pick the one with the highest bitrate,
+ // if other one will have a non-zero bitrate.
+ maxBitrate := 0
+ for _, variant := range media.VideoInfo.Variants {
+ if variant.Bitrate >= maxBitrate {
+ gif.URL = variant.URL
+ maxBitrate = variant.Bitrate
+ }
+ }
+
+ tw.GIFs = append(tw.GIFs, gif)
+ }
+
+ if !tw.SensitiveContent {
+ sensitive := media.ExtSensitiveMediaWarning
+ tw.SensitiveContent = sensitive.AdultContent || sensitive.GraphicViolence || sensitive.Other
+ }
+ }
+
+ for _, url := range tweet.Entities.URLs {
+ tw.URLs = append(tw.URLs, url.ExpandedURL)
+ }
+
+ tw.HTML = tweet.FullText
+ tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
+ return fmt.Sprintf(`%s`,
+ strings.TrimPrefix(hashtag, "#"),
+ hashtag,
+ )
+ })
+ tw.HTML = reUsername.ReplaceAllStringFunc(tw.HTML, func(username string) string {
+ return fmt.Sprintf(`%s`,
+ strings.TrimPrefix(username, "@"),
+ username,
+ )
+ })
+ var foundedMedia []string
+ tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
+ for _, entity := range tweet.Entities.URLs {
+ if tco == entity.URL {
+ return fmt.Sprintf(`%s`, entity.ExpandedURL, tco)
+ }
+ }
+ for _, entity := range tweet.ExtendedEntities.Media {
+ if tco == entity.URL {
+ foundedMedia = append(foundedMedia, entity.MediaURLHttps)
+ return fmt.Sprintf(`
`, tco, entity.MediaURLHttps)
+ }
+ }
+ return tco
+ })
+ for _, photo := range tw.Photos {
+ url := photo.URL
+ if stringInSlice(url, foundedMedia) {
+ continue
+ }
+ tw.HTML += fmt.Sprintf(`
`, url)
+ }
+ for _, video := range tw.Videos {
+ url := video.Preview
+ if stringInSlice(url, foundedMedia) {
+ continue
+ }
+ tw.HTML += fmt.Sprintf(`
`, url)
+ }
+ for _, gif := range tw.GIFs {
+ url := gif.Preview
+ if stringInSlice(url, foundedMedia) {
+ continue
+ }
+ tw.HTML += fmt.Sprintf(`
`, url)
+ }
+ tw.HTML = strings.Replace(tw.HTML, "\n", "
", -1)
+ return tw
+}
+
func parseProfile(user legacyUser) Profile {
profile := Profile{
Avatar: user.ProfileImageURLHTTPS,