Use GraphQL API with timeline v2
Close #85 Close #82 Close #77 Close #76
This commit is contained in:
parent
c07bd1d1d4
commit
50440667ed
7 changed files with 502 additions and 131 deletions
|
|
@ -19,7 +19,7 @@ func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesN
|
|||
}
|
||||
|
||||
// getSearchTimeline gets results for a given search query, via the Twitter frontend API
|
||||
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
|
||||
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timelineV1, error) {
|
||||
if !s.isLogged {
|
||||
return nil, errors.New("scraper is not logged in for search")
|
||||
}
|
||||
|
|
@ -57,7 +57,7 @@ func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*t
|
|||
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
var timeline timelineV1
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
|||
|
|
@ -7,90 +7,11 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
// timeline JSON object
|
||||
type timeline struct {
|
||||
// legacy timeline JSON object
|
||||
type timelineV1 struct {
|
||||
GlobalObjects struct {
|
||||
Tweets map[string]struct {
|
||||
ConversationIDStr string `json:"conversation_id_str"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
FavoriteCount int `json:"favorite_count"`
|
||||
FullText string `json:"full_text"`
|
||||
Entities struct {
|
||||
Hashtags []struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"hashtags"`
|
||||
Media []struct {
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
} `json:"media"`
|
||||
URLs []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
URL string `json:"url"`
|
||||
} `json:"urls"`
|
||||
UserMentions []struct {
|
||||
IDStr string `json:"id_str"`
|
||||
Name string `json:"name"`
|
||||
ScreenName string `json:"screen_name"`
|
||||
} `json:"user_mentions"`
|
||||
} `json:"entities"`
|
||||
ExtendedEntities struct {
|
||||
Media []struct {
|
||||
IDStr string `json:"id_str"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
ExtSensitiveMediaWarning struct {
|
||||
AdultContent bool `json:"adult_content"`
|
||||
GraphicViolence bool `json:"graphic_violence"`
|
||||
Other bool `json:"other"`
|
||||
} `json:"ext_sensitive_media_warning"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
VideoInfo struct {
|
||||
Variants []struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
URL string `json:"url"`
|
||||
} `json:"variants"`
|
||||
} `json:"video_info"`
|
||||
} `json:"media"`
|
||||
} `json:"extended_entities"`
|
||||
InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"`
|
||||
Place Place `json:"place"`
|
||||
ReplyCount int `json:"reply_count"`
|
||||
RetweetCount int `json:"retweet_count"`
|
||||
RetweetedStatusIDStr string `json:"retweeted_status_id_str"`
|
||||
QuotedStatusIDStr string `json:"quoted_status_id_str"`
|
||||
Time time.Time `json:"time"`
|
||||
UserIDStr string `json:"user_id_str"`
|
||||
Views struct {
|
||||
State string `json:"state"`
|
||||
Count string `json:"count"`
|
||||
} `json:"ext_views"`
|
||||
} `json:"tweets"`
|
||||
Users map[string]struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
Description string `json:"description"`
|
||||
Entities struct {
|
||||
URL struct {
|
||||
Urls []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
} `json:"urls"`
|
||||
} `json:"url"`
|
||||
} `json:"entities"`
|
||||
FavouritesCount int `json:"favourites_count"`
|
||||
FollowersCount int `json:"followers_count"`
|
||||
FriendsCount int `json:"friends_count"`
|
||||
IDStr string `json:"id_str"`
|
||||
ListedCount int `json:"listed_count"`
|
||||
Name string `json:"name"`
|
||||
Location string `json:"location"`
|
||||
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
|
||||
ProfileBannerURL string `json:"profile_banner_url"`
|
||||
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
|
||||
Protected bool `json:"protected"`
|
||||
ScreenName string `json:"screen_name"`
|
||||
StatusesCount int `json:"statuses_count"`
|
||||
Verified bool `json:"verified"`
|
||||
} `json:"users"`
|
||||
Tweets map[string]legacyTweet `json:"tweets"`
|
||||
Users map[string]legacyUser `json:"users"`
|
||||
} `json:"globalObjects"`
|
||||
Timeline struct {
|
||||
Instructions []struct {
|
||||
|
|
@ -162,7 +83,7 @@ type timeline struct {
|
|||
} `json:"timeline"`
|
||||
}
|
||||
|
||||
func (timeline *timeline) parseTweet(id string) *Tweet {
|
||||
func (timeline *timelineV1) parseTweet(id string) *Tweet {
|
||||
if tweet, ok := timeline.GlobalObjects.Tweets[id]; ok {
|
||||
username := timeline.GlobalObjects.Users[tweet.UserIDStr].ScreenName
|
||||
name := timeline.GlobalObjects.Users[tweet.UserIDStr].Name
|
||||
|
|
@ -191,14 +112,17 @@ func (timeline *timeline) parseTweet(id string) *Tweet {
|
|||
if tweet.QuotedStatusIDStr != "" {
|
||||
tw.IsQuoted = true
|
||||
tw.QuotedStatus = timeline.parseTweet(tweet.QuotedStatusIDStr)
|
||||
tw.QuotedStatusID = tweet.QuotedStatusIDStr
|
||||
}
|
||||
if tweet.InReplyToStatusIDStr != "" {
|
||||
tw.IsReply = true
|
||||
tw.InReplyToStatus = timeline.parseTweet(tweet.InReplyToStatusIDStr)
|
||||
tw.InReplyToStatusID = tweet.InReplyToStatusIDStr
|
||||
}
|
||||
if tweet.RetweetedStatusIDStr != "" {
|
||||
tw.IsRetweet = true
|
||||
tw.RetweetedStatus = timeline.parseTweet(tweet.RetweetedStatusIDStr)
|
||||
tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
|
||||
}
|
||||
|
||||
if tweet.Views.Count != "" {
|
||||
|
|
@ -210,7 +134,7 @@ func (timeline *timeline) parseTweet(id string) *Tweet {
|
|||
}
|
||||
|
||||
for _, pinned := range timeline.GlobalObjects.Users[tweet.UserIDStr].PinnedTweetIdsStr {
|
||||
if tweet.ConversationIDStr == pinned {
|
||||
if tweet.IDStr == pinned {
|
||||
tw.IsPin = true
|
||||
break
|
||||
}
|
||||
|
|
@ -311,7 +235,7 @@ func (timeline *timeline) parseTweet(id string) *Tweet {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (timeline *timeline) parseTweets() ([]*Tweet, string) {
|
||||
func (timeline *timelineV1) parseTweets() ([]*Tweet, string) {
|
||||
var cursor string
|
||||
var pinnedTweet *Tweet
|
||||
var orderedTweets []*Tweet
|
||||
|
|
@ -339,7 +263,7 @@ func (timeline *timeline) parseTweets() ([]*Tweet, string) {
|
|||
return orderedTweets, cursor
|
||||
}
|
||||
|
||||
func (timeline *timeline) parseUsers() ([]*Profile, string) {
|
||||
func (timeline *timelineV1) parseUsers() ([]*Profile, string) {
|
||||
users := make(map[string]Profile)
|
||||
|
||||
for id, user := range timeline.GlobalObjects.Users {
|
||||
301
timeline_v2.go
Normal file
301
timeline_v2.go
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
package twitterscraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type result struct {
|
||||
Typename string `json:"__typename"`
|
||||
Core struct {
|
||||
UserResults struct {
|
||||
Result struct {
|
||||
IsBlueVerified bool `json:"is_blue_verified"`
|
||||
Legacy legacyUser `json:"legacy"`
|
||||
} `json:"result"`
|
||||
} `json:"user_results"`
|
||||
} `json:"core"`
|
||||
Views struct {
|
||||
Count string `json:"count"`
|
||||
} `json:"views"`
|
||||
NoteTweet struct {
|
||||
NoteTweetResults struct {
|
||||
Result struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"result"`
|
||||
} `json:"note_tweet_results"`
|
||||
} `json:"note_tweet"`
|
||||
QuotedStatusResult struct {
|
||||
Result *result `json:"result"`
|
||||
} `json:"quoted_status_result"`
|
||||
Legacy legacyTweet `json:"legacy"`
|
||||
}
|
||||
|
||||
func (result *result) parse() *Tweet {
|
||||
if result.NoteTweet.NoteTweetResults.Result.Text != "" {
|
||||
result.Legacy.FullText = result.NoteTweet.NoteTweetResults.Result.Text
|
||||
}
|
||||
tw := parseLegacyTweet(&result.Core.UserResults.Result.Legacy, &result.Legacy)
|
||||
if tw.Views == 0 && result.Views.Count != "" {
|
||||
tw.Views, _ = strconv.Atoi(result.Views.Count)
|
||||
}
|
||||
if result.QuotedStatusResult.Result != nil {
|
||||
tw.QuotedStatus = result.QuotedStatusResult.Result.parse()
|
||||
}
|
||||
return tw
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
Content struct {
|
||||
CursorType string `json:"cursorType"`
|
||||
Value string `json:"value"`
|
||||
Items []struct {
|
||||
Item struct {
|
||||
ItemContent struct {
|
||||
TweetResults struct {
|
||||
Result result `json:"result"`
|
||||
} `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
} `json:"item"`
|
||||
} `json:"items"`
|
||||
ItemContent struct {
|
||||
TweetResults struct {
|
||||
Result result `json:"result"`
|
||||
} `json:"tweet_results"`
|
||||
} `json:"itemContent"`
|
||||
} `json:"content"`
|
||||
}
|
||||
|
||||
// timeline v2 JSON object
|
||||
type timelineV2 struct {
|
||||
Data struct {
|
||||
User struct {
|
||||
Result struct {
|
||||
TimelineV2 struct {
|
||||
Timeline struct {
|
||||
Instructions []struct {
|
||||
Entries []entry `json:"entries"`
|
||||
Entry entry `json:"entry"`
|
||||
Type string `json:"type"`
|
||||
} `json:"instructions"`
|
||||
} `json:"timeline"`
|
||||
} `json:"timeline_v2"`
|
||||
} `json:"result"`
|
||||
} `json:"user"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (timeline *timelineV2) parseTweets() ([]*Tweet, string) {
|
||||
var cursor string
|
||||
var tweets []*Tweet
|
||||
for _, instruction := range timeline.Data.User.Result.TimelineV2.Timeline.Instructions {
|
||||
for _, entry := range instruction.Entries {
|
||||
if entry.Content.CursorType == "Bottom" {
|
||||
cursor = entry.Content.Value
|
||||
continue
|
||||
}
|
||||
if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" {
|
||||
if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
||||
tweets = append(tweets, tweet)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return tweets, cursor
|
||||
}
|
||||
|
||||
type threadedConversation struct {
|
||||
Data struct {
|
||||
ThreadedConversationWithInjectionsV2 struct {
|
||||
Instructions []struct {
|
||||
Type string `json:"type"`
|
||||
Entries []entry `json:"entries"`
|
||||
Entry entry `json:"entry"`
|
||||
} `json:"instructions"`
|
||||
} `json:"threaded_conversation_with_injections_v2"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (conversation *threadedConversation) parse() []*Tweet {
|
||||
var tweets []*Tweet
|
||||
for _, instruction := range conversation.Data.ThreadedConversationWithInjectionsV2.Instructions {
|
||||
for _, entry := range instruction.Entries {
|
||||
if entry.Content.ItemContent.TweetResults.Result.Typename == "Tweet" {
|
||||
if tweet := entry.Content.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
||||
tweets = append(tweets, tweet)
|
||||
}
|
||||
}
|
||||
for _, item := range entry.Content.Items {
|
||||
if item.Item.ItemContent.TweetResults.Result.Typename == "Tweet" {
|
||||
if tweet := item.Item.ItemContent.TweetResults.Result.parse(); tweet != nil {
|
||||
tweets = append(tweets, tweet)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, tweet := range tweets {
|
||||
if tweet.InReplyToStatusID != "" {
|
||||
for _, parentTweet := range tweets {
|
||||
if parentTweet.ID == tweet.InReplyToStatusID {
|
||||
tweet.InReplyToStatus = parentTweet
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return tweets
|
||||
}
|
||||
|
||||
func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet {
|
||||
username := user.ScreenName
|
||||
name := user.Name
|
||||
tweetID := tweet.IDStr
|
||||
tw := &Tweet{
|
||||
ID: tweetID,
|
||||
Likes: tweet.FavoriteCount,
|
||||
Name: name,
|
||||
PermanentURL: fmt.Sprintf("https://twitter.com/%s/status/%s", username, tweetID),
|
||||
Replies: tweet.ReplyCount,
|
||||
Retweets: tweet.RetweetCount,
|
||||
Text: tweet.FullText,
|
||||
UserID: tweet.UserIDStr,
|
||||
Username: username,
|
||||
}
|
||||
|
||||
tm, err := time.Parse(time.RubyDate, tweet.CreatedAt)
|
||||
if err == nil {
|
||||
tw.TimeParsed = tm
|
||||
tw.Timestamp = tm.Unix()
|
||||
}
|
||||
|
||||
if tweet.Place.ID != "" {
|
||||
tw.Place = &tweet.Place
|
||||
}
|
||||
|
||||
if tweet.QuotedStatusIDStr != "" {
|
||||
tw.IsQuoted = true
|
||||
tw.QuotedStatusID = tweet.QuotedStatusIDStr
|
||||
}
|
||||
if tweet.InReplyToStatusIDStr != "" {
|
||||
tw.IsReply = true
|
||||
tw.InReplyToStatusID = tweet.InReplyToStatusIDStr
|
||||
}
|
||||
if tweet.RetweetedStatusIDStr != "" || tweet.RetweetedStatusResult.Result != nil {
|
||||
tw.IsRetweet = true
|
||||
tw.RetweetedStatusID = tweet.RetweetedStatusIDStr
|
||||
if tweet.RetweetedStatusResult.Result != nil {
|
||||
tw.RetweetedStatus = parseLegacyTweet(&tweet.RetweetedStatusResult.Result.Core.UserResults.Result.Legacy, &tweet.RetweetedStatusResult.Result.Legacy)
|
||||
tw.RetweetedStatusID = tw.RetweetedStatus.ID
|
||||
}
|
||||
}
|
||||
|
||||
if tweet.Views.Count != "" {
|
||||
views, viewsErr := strconv.Atoi(tweet.Views.Count)
|
||||
if viewsErr != nil {
|
||||
views = 0
|
||||
}
|
||||
tw.Views = views
|
||||
}
|
||||
|
||||
for _, pinned := range user.PinnedTweetIdsStr {
|
||||
if tweet.IDStr == pinned {
|
||||
tw.IsPin = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, hash := range tweet.Entities.Hashtags {
|
||||
tw.Hashtags = append(tw.Hashtags, hash.Text)
|
||||
}
|
||||
|
||||
for _, mention := range tweet.Entities.UserMentions {
|
||||
tw.Mentions = append(tw.Mentions, Mention{
|
||||
ID: mention.IDStr,
|
||||
Username: mention.ScreenName,
|
||||
Name: mention.Name,
|
||||
})
|
||||
}
|
||||
|
||||
for _, media := range tweet.ExtendedEntities.Media {
|
||||
if media.Type == "photo" {
|
||||
photo := Photo{
|
||||
ID: media.IDStr,
|
||||
URL: media.MediaURLHttps,
|
||||
}
|
||||
|
||||
tw.Photos = append(tw.Photos, photo)
|
||||
} else if media.Type == "video" {
|
||||
video := Video{
|
||||
ID: media.IDStr,
|
||||
Preview: media.MediaURLHttps,
|
||||
}
|
||||
|
||||
maxBitrate := 0
|
||||
for _, variant := range media.VideoInfo.Variants {
|
||||
if variant.Bitrate > maxBitrate {
|
||||
video.URL = strings.TrimSuffix(variant.URL, "?tag=10")
|
||||
maxBitrate = variant.Bitrate
|
||||
}
|
||||
}
|
||||
|
||||
tw.Videos = append(tw.Videos, video)
|
||||
}
|
||||
|
||||
if !tw.SensitiveContent {
|
||||
sensitive := media.ExtSensitiveMediaWarning
|
||||
tw.SensitiveContent = sensitive.AdultContent || sensitive.GraphicViolence || sensitive.Other
|
||||
}
|
||||
}
|
||||
|
||||
for _, url := range tweet.Entities.URLs {
|
||||
tw.URLs = append(tw.URLs, url.ExpandedURL)
|
||||
}
|
||||
|
||||
tw.HTML = tweet.FullText
|
||||
tw.HTML = reHashtag.ReplaceAllStringFunc(tw.HTML, func(hashtag string) string {
|
||||
return fmt.Sprintf(`<a href="https://twitter.com/hashtag/%s">%s</a>`,
|
||||
strings.TrimPrefix(hashtag, "#"),
|
||||
hashtag,
|
||||
)
|
||||
})
|
||||
tw.HTML = reUsername.ReplaceAllStringFunc(tw.HTML, func(username string) string {
|
||||
return fmt.Sprintf(`<a href="https://twitter.com/%s">%s</a>`,
|
||||
strings.TrimPrefix(username, "@"),
|
||||
username,
|
||||
)
|
||||
})
|
||||
var foundedMedia []string
|
||||
tw.HTML = reTwitterURL.ReplaceAllStringFunc(tw.HTML, func(tco string) string {
|
||||
for _, entity := range tweet.Entities.URLs {
|
||||
if tco == entity.URL {
|
||||
return fmt.Sprintf(`<a href="%s">%s</a>`, entity.ExpandedURL, tco)
|
||||
}
|
||||
}
|
||||
for _, entity := range tweet.ExtendedEntities.Media {
|
||||
if tco == entity.URL {
|
||||
foundedMedia = append(foundedMedia, entity.MediaURLHttps)
|
||||
return fmt.Sprintf(`<br><a href="%s"><img src="%s"/></a>`, tco, entity.MediaURLHttps)
|
||||
}
|
||||
}
|
||||
return tco
|
||||
})
|
||||
for _, photo := range tw.Photos {
|
||||
url := photo.URL
|
||||
if stringInSlice(url, foundedMedia) {
|
||||
continue
|
||||
}
|
||||
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
|
||||
}
|
||||
for _, video := range tw.Videos {
|
||||
url := video.Preview
|
||||
if stringInSlice(url, foundedMedia) {
|
||||
continue
|
||||
}
|
||||
tw.HTML += fmt.Sprintf(`<br><img src="%s"/>`, url)
|
||||
}
|
||||
tw.HTML = strings.Replace(tw.HTML, "\n", "<br>", -1)
|
||||
return tw
|
||||
}
|
||||
|
|
@ -16,7 +16,7 @@ func (s *Scraper) GetTrends() ([]string, error) {
|
|||
q.Add("entity_tokens", "false")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var jsn timeline
|
||||
var jsn timelineV1
|
||||
curBearerToken := s.bearerToken
|
||||
if curBearerToken != bearerToken2 {
|
||||
s.setBearerToken(bearerToken2)
|
||||
|
|
|
|||
102
tweets.go
102
tweets.go
|
|
@ -3,7 +3,7 @@ package twitterscraper
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
// GetTweets returns channel with tweets for a given user.
|
||||
|
|
@ -21,26 +21,59 @@ func (s *Scraper) FetchTweets(user string, maxTweetsNbr int, cursor string) ([]*
|
|||
return s.FetchTweetsByUserID(userID, maxTweetsNbr, cursor)
|
||||
}
|
||||
|
||||
// FetchTweetsByUserID gets tweets for a given userID, via the Twitter frontend API.
|
||||
// FetchTweetsByUserID gets tweets for a given userID, via the Twitter frontend GraphQL API.
|
||||
func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
|
||||
if maxTweetsNbr > 200 {
|
||||
maxTweetsNbr = 200
|
||||
}
|
||||
|
||||
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/profile/"+userID+".json")
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/UGi7tjRPr-d_U3bCPIko5Q/UserTweets")
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
q := req.URL.Query()
|
||||
q.Add("count", strconv.Itoa(maxTweetsNbr))
|
||||
q.Add("userId", userID)
|
||||
if cursor != "" {
|
||||
q.Add("cursor", cursor)
|
||||
variables := map[string]interface{}{
|
||||
"userId": userID,
|
||||
"count": maxTweetsNbr,
|
||||
"includePromotedContent": false,
|
||||
"withQuickPromoteEligibilityTweetFields": false,
|
||||
"withVoice": true,
|
||||
"withV2Timeline": true,
|
||||
}
|
||||
features := map[string]interface{}{
|
||||
"rweb_lists_timeline_redesign_enabled": true,
|
||||
"responsive_web_graphql_exclude_directive_enabled": true,
|
||||
"verified_phone_label_enabled": false,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": true,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
|
||||
"tweetypie_unmention_optimization_enabled": true,
|
||||
"vibe_api_enabled": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
|
||||
"view_counts_everywhere_api_enabled": true,
|
||||
"longform_notetweets_consumption_enabled": true,
|
||||
"tweet_awards_web_tipping_enabled": false,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
|
||||
"interactive_text_enabled": true,
|
||||
"responsive_web_text_conversations_enabled": false,
|
||||
"longform_notetweets_rich_text_read_enabled": true,
|
||||
"longform_notetweets_inline_media_enabled": false,
|
||||
"responsive_web_enhance_cards_enabled": false,
|
||||
}
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
var timeline timeline
|
||||
if cursor != "" {
|
||||
variables["cursor"] = cursor
|
||||
}
|
||||
|
||||
query := url.Values{}
|
||||
query.Set("variables", mapToJSONString(variables))
|
||||
query.Set("features", mapToJSONString(features))
|
||||
req.URL.RawQuery = query.Encode()
|
||||
|
||||
var timeline timelineV2
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
|
|
@ -52,18 +85,59 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st
|
|||
|
||||
// GetTweet get a single tweet by ID.
|
||||
func (s *Scraper) GetTweet(id string) (*Tweet, error) {
|
||||
req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/conversation/"+id+".json")
|
||||
req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/wETHelmSuBQR5r-dgUlPxg/TweetDetail")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var timeline timeline
|
||||
err = s.RequestAPI(req, &timeline)
|
||||
variables := map[string]interface{}{
|
||||
"focalTweetId": id,
|
||||
"referrer": "profile",
|
||||
"with_rux_injections": false,
|
||||
"includePromotedContent": true,
|
||||
"withCommunity": true,
|
||||
"withQuickPromoteEligibilityTweetFields": true,
|
||||
"withBirdwatchNotes": true,
|
||||
"withVoice": true,
|
||||
"withV2Timeline": true,
|
||||
}
|
||||
|
||||
features := map[string]interface{}{
|
||||
"rweb_lists_timeline_redesign_enabled": true,
|
||||
"responsive_web_graphql_exclude_directive_enabled": true,
|
||||
"verified_phone_label_enabled": false,
|
||||
"creator_subscriptions_tweet_preview_api_enabled": true,
|
||||
"responsive_web_graphql_timeline_navigation_enabled": true,
|
||||
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
|
||||
"tweetypie_unmention_optimization_enabled": true,
|
||||
"vibe_api_enabled": true,
|
||||
"responsive_web_edit_tweet_api_enabled": true,
|
||||
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
|
||||
"view_counts_everywhere_api_enabled": true,
|
||||
"longform_notetweets_consumption_enabled": true,
|
||||
"tweet_awards_web_tipping_enabled": false,
|
||||
"freedom_of_speech_not_reach_fetch_enabled": true,
|
||||
"standardized_nudges_misinfo": true,
|
||||
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,
|
||||
"interactive_text_enabled": true,
|
||||
"responsive_web_text_conversations_enabled": false,
|
||||
"longform_notetweets_rich_text_read_enabled": true,
|
||||
"longform_notetweets_inline_media_enabled": false,
|
||||
"responsive_web_enhance_cards_enabled": false,
|
||||
}
|
||||
|
||||
query := url.Values{}
|
||||
query.Set("variables", mapToJSONString(variables))
|
||||
query.Set("features", mapToJSONString(features))
|
||||
req.URL.RawQuery = query.Encode()
|
||||
|
||||
var conversation threadedConversation
|
||||
err = s.RequestAPI(req, &conversation)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tweets, _ := timeline.parseTweets()
|
||||
tweets := conversation.parse()
|
||||
for _, tweet := range tweets {
|
||||
if tweet.ID == id {
|
||||
return tweet, nil
|
||||
|
|
|
|||
117
types.go
117
types.go
|
|
@ -25,33 +25,36 @@ type (
|
|||
|
||||
// Tweet type.
|
||||
Tweet struct {
|
||||
Hashtags []string
|
||||
HTML string
|
||||
ID string
|
||||
InReplyToStatus *Tweet
|
||||
IsQuoted bool
|
||||
IsPin bool
|
||||
IsReply bool
|
||||
IsRetweet bool
|
||||
Likes int
|
||||
Name string
|
||||
Mentions []Mention
|
||||
PermanentURL string
|
||||
Photos []Photo
|
||||
Place *Place
|
||||
QuotedStatus *Tweet
|
||||
Replies int
|
||||
Retweets int
|
||||
RetweetedStatus *Tweet
|
||||
Text string
|
||||
TimeParsed time.Time
|
||||
Timestamp int64
|
||||
URLs []string
|
||||
UserID string
|
||||
Username string
|
||||
Videos []Video
|
||||
Views int
|
||||
SensitiveContent bool
|
||||
Hashtags []string
|
||||
HTML string
|
||||
ID string
|
||||
InReplyToStatus *Tweet
|
||||
InReplyToStatusID string
|
||||
IsQuoted bool
|
||||
IsPin bool
|
||||
IsReply bool
|
||||
IsRetweet bool
|
||||
Likes int
|
||||
Name string
|
||||
Mentions []Mention
|
||||
PermanentURL string
|
||||
Photos []Photo
|
||||
Place *Place
|
||||
QuotedStatus *Tweet
|
||||
QuotedStatusID string
|
||||
Replies int
|
||||
Retweets int
|
||||
RetweetedStatus *Tweet
|
||||
RetweetedStatusID string
|
||||
Text string
|
||||
TimeParsed time.Time
|
||||
Timestamp int64
|
||||
URLs []string
|
||||
UserID string
|
||||
Username string
|
||||
Videos []Video
|
||||
Views int
|
||||
SensitiveContent bool
|
||||
}
|
||||
|
||||
// ProfileResult of scrapping.
|
||||
|
|
@ -66,6 +69,66 @@ type (
|
|||
Error error
|
||||
}
|
||||
|
||||
legacyTweet struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
FavoriteCount int `json:"favorite_count"`
|
||||
FullText string `json:"full_text"`
|
||||
Entities struct {
|
||||
Hashtags []struct {
|
||||
Text string `json:"text"`
|
||||
} `json:"hashtags"`
|
||||
Media []struct {
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
} `json:"media"`
|
||||
URLs []struct {
|
||||
ExpandedURL string `json:"expanded_url"`
|
||||
URL string `json:"url"`
|
||||
} `json:"urls"`
|
||||
UserMentions []struct {
|
||||
IDStr string `json:"id_str"`
|
||||
Name string `json:"name"`
|
||||
ScreenName string `json:"screen_name"`
|
||||
} `json:"user_mentions"`
|
||||
} `json:"entities"`
|
||||
ExtendedEntities struct {
|
||||
Media []struct {
|
||||
IDStr string `json:"id_str"`
|
||||
MediaURLHttps string `json:"media_url_https"`
|
||||
ExtSensitiveMediaWarning struct {
|
||||
AdultContent bool `json:"adult_content"`
|
||||
GraphicViolence bool `json:"graphic_violence"`
|
||||
Other bool `json:"other"`
|
||||
} `json:"ext_sensitive_media_warning"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
VideoInfo struct {
|
||||
Variants []struct {
|
||||
Bitrate int `json:"bitrate"`
|
||||
URL string `json:"url"`
|
||||
} `json:"variants"`
|
||||
} `json:"video_info"`
|
||||
} `json:"media"`
|
||||
} `json:"extended_entities"`
|
||||
IDStr string `json:"id_str"`
|
||||
InReplyToStatusIDStr string `json:"in_reply_to_status_id_str"`
|
||||
Place Place `json:"place"`
|
||||
ReplyCount int `json:"reply_count"`
|
||||
RetweetCount int `json:"retweet_count"`
|
||||
RetweetedStatusIDStr string `json:"retweeted_status_id_str"`
|
||||
RetweetedStatusResult struct {
|
||||
Result *result `json:"result"`
|
||||
} `json:"retweeted_status_result"`
|
||||
QuotedStatusIDStr string `json:"quoted_status_id_str"`
|
||||
Time time.Time `json:"time"`
|
||||
UserIDStr string `json:"user_id_str"`
|
||||
Views struct {
|
||||
State string `json:"state"`
|
||||
Count string `json:"count"`
|
||||
} `json:"ext_views"`
|
||||
}
|
||||
|
||||
legacyUser struct {
|
||||
CreatedAt string `json:"created_at"`
|
||||
Description string `json:"description"`
|
||||
|
|
|
|||
9
util.go
9
util.go
|
|
@ -2,6 +2,7 @@ package twitterscraper
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
|
|
@ -186,6 +187,14 @@ func parseProfile(user legacyUser) Profile {
|
|||
return profile
|
||||
}
|
||||
|
||||
func mapToJSONString(data map[string]interface{}) string {
|
||||
jsonBytes, err := json.Marshal(data)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(jsonBytes)
|
||||
}
|
||||
|
||||
func stringInSlice(a string, list []string) bool {
|
||||
for _, b := range list {
|
||||
if b == a {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue