From dbe17fb4d0088992f81db72610127c70012e8bdb Mon Sep 17 00:00:00 2001 From: Valentine Date: Mon, 29 Jan 2024 00:07:40 +0300 Subject: [PATCH 1/2] fix socks proxy without auth --- scraper.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scraper.go b/scraper.go index 9852eee..98326c6 100644 --- a/scraper.go +++ b/scraper.go @@ -143,7 +143,16 @@ func (s *Scraper) SetProxy(proxyAddr string) error { host := proxyURL.Hostname() port := proxyURL.Port() - dialSocksProxy, err := proxy.SOCKS5("tcp", host+":"+port, &proxy.Auth{User: username, Password: password}, baseDialer) + var auth *proxy.Auth + + if username != "" || password != "" { + auth = &proxy.Auth{ + User: username, + Password: password, + } + } + + dialSocksProxy, err := proxy.SOCKS5("tcp", host+":"+port, auth, baseDialer) if err != nil { return errors.New("error creating socks5 proxy :" + err.Error()) } From 19b6e56438e6f23d69d6f9854609bd496eb9ee14 Mon Sep 17 00:00:00 2001 From: Valentine Date: Tue, 13 Feb 2024 04:45:08 +0300 Subject: [PATCH 2/2] add user media --- README.md | 26 ++++++++++++++++ medias.go | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ timeline_v2.go | 49 ++++++++++++++++++++--------- 3 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 medias.go diff --git a/README.md b/README.md index 3ca39aa..7d97613 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,32 @@ func main() { It appears you can ask for up to 50 tweets. +### Get user medias + +```golang +package main + +import ( + "context" + "fmt" + twitterscraper "github.com/imperatrona/twitter-scraper" +) + +func main() { + scraper := twitterscraper.New() + account, err := scraper.LoginOpenAccount() + if err != nil { + panic(err) + } + for tweet := range scraper.GetMediaTweets(context.Background(), "Twitter", 50) { + if tweet.Error != nil { + panic(tweet.Error) + } + fmt.Println(tweet.Text) + } +} +``` + ### Get single tweet ```golang diff --git a/medias.go b/medias.go new file mode 100644 index 0000000..03ff9d3 --- /dev/null +++ b/medias.go @@ -0,0 +1,84 @@ +package twitterscraper + +import ( + "context" + "net/url" +) + +// GetTweets returns channel with tweets for a given user. +func (s *Scraper) GetMediaTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult { + return getTweetTimeline(ctx, user, maxTweetsNbr, s.FetchMediaTweets) +} + +// FetchMediaTweets gets tweets with medias for a given user, via the Twitter frontend API. +func (s *Scraper) FetchMediaTweets(user string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + userID, err := s.GetUserIDByScreenName(user) + if err != nil { + return nil, "", err + } + + return s.FetchMediaTweetsByUserID(userID, maxTweetsNbr, cursor) +} + +// FetchMediaTweetsByUserID gets tweets with medias for a given userID, via the Twitter frontend GraphQL API. +func (s *Scraper) FetchMediaTweetsByUserID(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) { + if maxTweetsNbr > 200 { + maxTweetsNbr = 200 + } + + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/2tLOJWwGuCTytDrGBg8VwQ/UserMedia") + if err != nil { + return nil, "", err + } + + variables := map[string]interface{}{ + "userId": userID, + "count": maxTweetsNbr, + "includePromotedContent": false, + "withClientEventToken": false, + "withBirdwatchNotes": false, + "withVoice": true, + "withV2Timeline": true, + } + features := map[string]interface{}{ + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "c9s_tweet_anatomy_moderator_badge_enabled": true, + "tweetypie_unmention_optimization_enabled": true, + "responsive_web_edit_tweet_api_enabled": true, + "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, + "view_counts_everywhere_api_enabled": true, + "longform_notetweets_consumption_enabled": true, + "responsive_web_twitter_article_tweet_consumption_enabled": true, + "tweet_awards_web_tipping_enabled": false, + "freedom_of_speech_not_reach_fetch_enabled": true, + "standardized_nudges_misinfo": true, + "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true, + "rweb_video_timestamps_enabled": true, + "longform_notetweets_rich_text_read_enabled": true, + "longform_notetweets_inline_media_enabled": true, + "responsive_web_media_download_video_enabled": false, + "responsive_web_enhance_cards_enabled": false, + } + + if cursor != "" { + variables["cursor"] = cursor + } + + query := url.Values{} + query.Set("variables", mapToJSONString(variables)) + query.Set("features", mapToJSONString(features)) + req.URL.RawQuery = query.Encode() + + var timeline timelineV2 + err = s.RequestAPI(req, &timeline) + if err != nil { + return nil, "", err + } + + tweets, nextCursor := timeline.parseTweets() + return tweets, nextCursor, nil +} diff --git a/timeline_v2.go b/timeline_v2.go index e33bb9b..2ab8124 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -47,20 +47,22 @@ func (result *result) parse() *Tweet { return tw } +type item struct { + Item struct { + ItemContent struct { + TweetDisplayType string `json:"tweetDisplayType"` + TweetResults struct { + Result result `json:"result"` + } `json:"tweet_results"` + } `json:"itemContent"` + } `json:"item"` +} + type entry struct { Content struct { - CursorType string `json:"cursorType"` - Value string `json:"value"` - Items []struct { - Item struct { - ItemContent struct { - TweetDisplayType string `json:"tweetDisplayType"` - TweetResults struct { - Result result `json:"result"` - } `json:"tweet_results"` - } `json:"itemContent"` - } `json:"item"` - } `json:"items"` + CursorType string `json:"cursorType"` + Value string `json:"value"` + Items []item `json:"items"` ItemContent struct { TweetDisplayType string `json:"tweetDisplayType"` TweetResults struct { @@ -85,9 +87,10 @@ type timelineV2 struct { TimelineV2 struct { Timeline struct { Instructions []struct { - Entries []entry `json:"entries"` - Entry entry `json:"entry"` - Type string `json:"type"` + ModuleItems []item `json:"moduleItems"` + Entries []entry `json:"entries"` + Entry entry `json:"entry"` + Type string `json:"type"` } `json:"instructions"` } `json:"timeline"` } `json:"timeline_v2"` @@ -110,6 +113,22 @@ func (timeline *timelineV2) parseTweets() ([]*Tweet, string) { tweets = append(tweets, tweet) } } + if len(entry.Content.Items) > 0 { + for _, item := range entry.Content.Items { + if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil { + tweets = append(tweets, tweet) + } + } + } + } + if len(instruction.ModuleItems) > 0 { + for _, entry := range instruction.ModuleItems { + if entry.Item.ItemContent.TweetResults.Result.Typename == "Tweet" { + if tweet := entry.Item.ItemContent.TweetResults.Result.parse(); tweet != nil { + tweets = append(tweets, tweet) + } + } + } } } return tweets, cursor