added scheduling & media upload

This commit is contained in:
Valentine 2024-03-09 03:55:39 +03:00
parent f5c1694211
commit 3c23a975da
7 changed files with 431 additions and 7 deletions

View file

@ -31,6 +31,10 @@ You can use this library to get tweets, profiles, and trends trivially.
- [Get trends](#get-trends)
- [Get following](#get-following)
- [Get followers](#get-followers)
- [Get scheduled tweets](#get-scheduled-tweets)
- [Create scheduled tweet](#create-scheduled-tweet)
- [Delete scheduled tweet](#delete-scheduled-tweet)
- [Upload media](#upload-media)
- [Connection](#connection)
- [Proxy](#proxy)
- [HTTP(s)](#https)
@ -345,7 +349,7 @@ trends, err := scraper.GetTrends()
```golang
var cursor string
users, cursor, err := testScraper.FetchFollowing("Support", 20, cursor)
users, cursor, err := scraper.FetchFollowing("Support", 20, cursor)
```
### Get followers
@ -357,7 +361,57 @@ users, cursor, err := testScraper.FetchFollowing("Support", 20, cursor)
```golang
var cursor string
users, cursor, err := testScraper.FetchFollowers("Support", 20, cursor)
users, cursor, err := scraper.FetchFollowers("Support", 20, cursor)
```
### Get scheduled tweets
> [!IMPORTANT]
> Requires authentication!
500 requests / 15 minutes
```golang
tweets, err := scraper.FetchScheduledTweets()
```
### Create scheduled tweet
> [!IMPORTANT]
> Requires authentication!
500 requests / 15 minutes
```golang
tweets, err := scraper.CreateScheduledTweet(twitterscraper.TweetSchedule{
Text: "New scheduled tweet text",
Date: time.Now().Add(time.Hour * 24 * 31),
Medias: nil,
})
```
### Delete scheduled tweet
> [!IMPORTANT]
> Requires authentication!
500 requests / 15 minutes
```golang
err := scraper.DeleteScheduledTweet("123")
```
### Upload media
> [!IMPORTANT]
> Requires authentication!
50 requests / 15 minutes
Uploads photo, video or gif for further posting or scheduling. Expires in 24 hours if not used.
```golang
media, err := scraper.UploadMedia("./files/movie.mp4")
```
## Connection

1
go.mod
View file

@ -3,6 +3,7 @@ module github.com/imperatrona/twitter-scraper
go 1.16
require (
github.com/AlexEidt/Vidio v1.5.1 // indirect
github.com/google/go-cmp v0.5.9
golang.org/x/net v0.17.0
)

2
go.sum
View file

@ -1,3 +1,5 @@
github.com/AlexEidt/Vidio v1.5.1 h1:tovwvtgQagUz1vifiL9OeWkg1fP/XUzFazFKh7tFtaE=
github.com/AlexEidt/Vidio v1.5.1/go.mod h1:djhIMnWMqPrC3X6nB6ymGX6uWWlgw+VayYGKE1bNwmI=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=

View file

@ -6,6 +6,7 @@ import (
"errors"
"io"
"net/url"
"strconv"
"strings"
"time"
)
@ -109,6 +110,12 @@ type scheduleTweets struct {
} `json:"data"`
}
type TweetSchedule struct {
Text string
Date time.Time
Medias []*Media
}
func (timeline *scheduleTweets) parseTweets() []*ScheduledTweet {
var tweets []*ScheduledTweet
@ -185,8 +192,8 @@ func (s *Scraper) DeleteScheduledTweet(id string) error {
}
// CreateScheduledTweet schedule new tweet.
func (s *Scraper) CreateScheduledTweet(text string, date time.Time) (string, error) {
if date.Unix() <= time.Now().Unix() {
func (s *Scraper) CreateScheduledTweet(schedule TweetSchedule) (string, error) {
if schedule.Date.Unix() <= time.Now().Unix() {
return "", errors.New("date can't be in past")
}
@ -199,14 +206,24 @@ func (s *Scraper) CreateScheduledTweet(text string, date time.Time) (string, err
post_tweet_request := map[string]interface{}{
"auto_populate_reply_metadata": false,
"status": text,
"status": schedule.Text,
"exclude_reply_user_ids": []string{},
"media_ids": []string{},
}
if len(schedule.Medias) > 0 {
var media_ids []string
for _, media := range schedule.Medias {
media_ids = append(media_ids, strconv.Itoa(media.ID))
}
post_tweet_request["media_ids"] = media_ids
}
variables := map[string]interface{}{
"post_tweet_request": post_tweet_request,
"execute_at": date.Unix(),
"execute_at": schedule.Date.Unix(),
}
body := map[string]interface{}{

View file

@ -5,6 +5,8 @@ import (
"fmt"
"testing"
"time"
twitterscraper "github.com/imperatrona/twitter-scraper"
)
func TestFetchScheduledTweets(t *testing.T) {
@ -27,7 +29,12 @@ func TestCreateScheduledTweets(t *testing.T) {
t.Skip("Skipping test due to environment variable")
}
var err error
id, err = testScraper.CreateScheduledTweet("new tweet", time.Now().Add(time.Hour*24*31))
id, err = testScraper.CreateScheduledTweet(twitterscraper.TweetSchedule{
Text: "new tweet",
Date: time.Now().Add(time.Hour * 24 * 31),
Medias: nil,
})
if err != nil {
t.Error(err)
}

230
upload.go Normal file
View file

@ -0,0 +1,230 @@
package twitterscraper
import (
"bytes"
"fmt"
"io"
"log"
"mime/multipart"
"net/http"
"net/url"
"os"
"strconv"
"strings"
"time"
vidio "github.com/AlexEidt/Vidio"
)
type Media struct {
ID int
Type string
Size int
Parts int
ExpiresAt time.Time
}
type uploadInitResponse struct {
ID int `json:"media_id"`
ExpiresAfter int `json:"expires_after_secs"`
}
type ProcessingInfo struct {
State string `json:"state"`
CheckAfter int `json:"check_after_secs"`
Progress int `json:"progress_percent"`
}
type uploadStatusResponse struct {
ProcessingInfo ProcessingInfo `json:"processing_info"`
}
// Uploads photo, video or gif for further posting or scheduling. Expires in 24 hours if not used.
func (s *Scraper) UploadMedia(filePath string) (*Media, error) {
fileContent, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}
media, err := s.uploadInit(filePath, fileContent)
if err != nil {
return nil, err
}
err = s.uploadAppend(media, fileContent)
if err != nil {
return nil, err
}
var status *ProcessingInfo
status, err = s.uploadFinalize(media)
if err != nil {
return nil, err
}
if strings.HasPrefix(media.Type, "image") {
return media, nil
}
for status.State != "succeeded" {
time.Sleep(2 * time.Second)
status, err = s.uploadStatus(media)
if err != nil {
return nil, err
}
}
return media, nil
}
func (s *Scraper) uploadInit(filePath string, fileContent []byte) (*Media, error) {
var (
videoDuration float64
fileType string
mediaCategory = "tweet_"
)
fileType = http.DetectContentType(fileContent)
if fileType == "image/jpeg" || fileType == "image/png" {
mediaCategory += "image"
} else if fileType == "image/gif" {
mediaCategory += "gif"
} else if fileType == "video/mp4" || fileType == "video/quicktime" {
mediaCategory += "video"
video, err := vidio.NewVideo(filePath)
if err != nil {
return nil, err
}
videoDuration = video.Duration()
video.Close()
} else {
return nil, fmt.Errorf("file type %s unsupported by twitter, make sure you uploading photo, video or gif", fileType)
}
req, err := s.newRequest("POST", "https://upload.twitter.com/i/media/upload.json")
if err != nil {
return nil, err
}
query := url.Values{}
query.Set("command", "INIT")
query.Set("total_bytes", strconv.Itoa(len(fileContent)))
query.Set("media_type", fileType)
query.Set("media_category", mediaCategory)
if mediaCategory == "tweet_video" {
query.Set("video_duration_ms", strconv.FormatFloat(videoDuration*1000, 'f', -1, 64))
}
req.URL.RawQuery = query.Encode()
req.Header.Set("Origin", "https://twitter.com")
req.Header.Set("Referer", "https://twitter.com/")
var uploadInit uploadInitResponse
err = s.RequestAPI(req, &uploadInit)
if err != nil {
return nil, err
}
return &Media{
ID: uploadInit.ID,
Type: fileType,
Size: len(fileContent),
ExpiresAt: time.Now().Add(time.Duration(uploadInit.ExpiresAfter) * time.Second),
Parts: len(fileContent) / 2_000_000,
}, nil
}
func (s *Scraper) uploadAppend(media *Media, fileContent []byte) error {
for i := 0; i <= media.Parts; i++ {
var partData []byte
if i+1 <= media.Parts {
partData = fileContent[i*2_000_000 : (i+1)*2_000_000]
} else {
partData = fileContent[i*2_000_000:]
}
var buf bytes.Buffer
w := multipart.NewWriter(&buf)
fw, err := w.CreateFormFile("media", "blob")
if err != nil {
log.Fatal(err)
}
if _, err = io.Copy(fw, bytes.NewReader(partData)); err != nil {
return err
}
w.Close()
req, err := s.newRequest("POST", "https://upload.twitter.com/i/media/upload.json")
if err != nil {
return err
}
query := url.Values{}
query.Set("command", "APPEND")
query.Set("media_id", strconv.Itoa(media.ID))
query.Set("segment_index", strconv.Itoa(i))
req.URL.RawQuery = query.Encode()
req.Header.Set("Content-Type", w.FormDataContentType())
req.Header.Set("Origin", "https://twitter.com")
req.Header.Set("Referer", "https://twitter.com/")
req.Body = io.NopCloser(&buf)
err = s.RequestAPI(req, nil)
if err != nil {
return err
}
}
return nil
}
func (s *Scraper) uploadFinalize(media *Media) (*ProcessingInfo, error) {
req, err := s.newRequest("POST", "https://upload.twitter.com/i/media/upload.json")
if err != nil {
return nil, err
}
query := url.Values{}
query.Set("command", "FINALIZE")
query.Set("media_id", strconv.Itoa(media.ID))
query.Set("allow_async", "true")
req.URL.RawQuery = query.Encode()
req.Header.Set("Origin", "https://twitter.com")
req.Header.Set("Referer", "https://twitter.com/")
var response uploadStatusResponse
err = s.RequestAPI(req, &response)
if err != nil {
return nil, err
}
return &response.ProcessingInfo, nil
}
func (s *Scraper) uploadStatus(media *Media) (*ProcessingInfo, error) {
req, err := s.newRequest("GET", "https://upload.twitter.com/i/media/upload.json")
if err != nil {
return nil, err
}
query := url.Values{}
query.Set("command", "STATUS")
query.Set("media_id", strconv.Itoa(media.ID))
req.URL.RawQuery = query.Encode()
req.Header.Set("Origin", "https://twitter.com")
req.Header.Set("Referer", "https://twitter.com/")
var response uploadStatusResponse
err = s.RequestAPI(req, &response)
if err != nil {
return nil, err
}
return &response.ProcessingInfo, nil
}

113
upload_test.go Normal file
View file

@ -0,0 +1,113 @@
package twitterscraper_test
import (
"io"
"net/http"
"os"
"testing"
)
func TestPhotoUpload(t *testing.T) {
if skipAuthTest {
t.Skip("Skipping test due to environment variable")
}
// Create temp file
f, err := os.CreateTemp("", "tmp_*.png")
if err != nil {
t.Error(err)
}
defer f.Close()
defer os.Remove(f.Name())
resp, err := http.Get("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")
if err != nil {
t.Error(err)
}
defer resp.Body.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
t.Error(err)
}
media, err := testScraper.UploadMedia(f.Name())
if err != nil {
t.Error(err)
}
if media.ID == 0 {
t.Error("Media ID shouldn't be 0")
}
}
func TestVideoUpload(t *testing.T) {
if skipAuthTest {
t.Skip("Skipping test due to environment variable")
}
// Create temp file
f, err := os.CreateTemp("", "tmp_*.mp4")
if err != nil {
t.Error(err)
}
defer f.Close()
defer os.Remove(f.Name())
resp, err := http.Get("https://github.com/chthomos/video-media-samples/raw/master/big-buck-bunny-480p-30sec.mp4")
if err != nil {
t.Error(err)
}
defer resp.Body.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
t.Error(err)
}
media, err := testScraper.UploadMedia(f.Name())
if err != nil {
t.Error(err)
}
if media.ID == 0 {
t.Error("Media ID shouldn't be 0")
}
}
func TestGifUpload(t *testing.T) {
if skipAuthTest {
t.Skip("Skipping test due to environment variable")
}
// Create temp file
f, err := os.CreateTemp("", "tmp_*.gif")
if err != nil {
t.Error(err)
}
defer f.Close()
defer os.Remove(f.Name())
resp, err := http.Get("https://i.giphy.com/dNKC0e3QFNPZC.gif")
if err != nil {
t.Error(err)
}
defer resp.Body.Close()
_, err = io.Copy(f, resp.Body)
if err != nil {
t.Error(err)
}
media, err := testScraper.UploadMedia(f.Name())
if err != nil {
t.Error(err)
}
if media.ID == 0 {
t.Error("Media ID shouldn't be 0")
}
}