Skip to content

Commit

Permalink
πŸ› Circumvent anti-abuse measures
Browse files Browse the repository at this point in the history
  • Loading branch information
malted committed Sep 10, 2024
1 parent 3780596 commit 042b51c
Show file tree
Hide file tree
Showing 8 changed files with 453 additions and 75 deletions.
56 changes: 56 additions & 0 deletions examples/full_flow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
use airtable_api::Record;
use anyhow::Result;
use chrono::Utc;
use dotenv::var;
use replit_takeout::{
airtable::{self, AirtableSyncedUser, ProcessState},
replit_graphql::ProfileRepls,
};

#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
dotenv::dotenv().ok();

let token = var("REPLIT_TEST_TOKEN")?;

let fields = AirtableSyncedUser {
id: 29999230,
token,
username: "malted".into(),
status: ProcessState::Registered,
email: "test@malted.dev".into(),
r2_link: "http://example.com".into(),
failed_ids: "none".into(),
started_at: Some(Utc::now()),
finished_at: None,
repl_count: 0,
file_count: 0,
statistics: vec!["recpWEjc0zLoKEtZP".into()],
};

let mut user = Record {
id: String::new(),
fields,
created_time: None,
};

log::info!("Starting...");
if let Err(err) = ProfileRepls::download(&user.fields.token, user.clone()).await {
log::error!("Error with `{}`'s download: {err:#?}", user.fields.username);

user.fields.status = ProcessState::ErroredMain;
//arst airtable::update_records(vec![user.clone()]).await?;

// user.fields.failed_ids = errored.join(",");

// send_email(
// &user.fields.email,
// "Your Replitβ • export is slightly delayed :/".into(),
// format!("Hey {}, We have run into an issue processing your Replitβ • takeout πŸ₯‘.\nWe will manually review and confirm that all your data is included. If you don't hear back again within a few days email malted@hackclub.com. Sorry for the inconvenience.", user.fields.username),
// )
// .await;
}

Ok(())
}
29 changes: 29 additions & 0 deletions examples/repls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use std::collections::HashMap;

use anyhow::Result;
use dotenv::var;
use log::error;
use replit_takeout::replit::repls::Repl;

#[tokio::main]
async fn main() -> Result<()> {
env_logger::init();
dotenv::dotenv().ok();

let token = var("REPLIT_TEST_TOKEN")?;

let repls = Repl::fetch(&token, None).await.expect("some repls");
error!("got {} repls", repls.len());

let mut map: HashMap<String, Repl> = HashMap::new();

for repl in repls {
if map.contains_key(&repl.id) {
log::error!("ALREADY CONTAINS {:?}", repl.clone());
}

map.insert(repl.id.clone(), repl);
}

Ok(())
}
33 changes: 33 additions & 0 deletions src/graphql/repls-query.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
query ReplList($path: String!, $starred: Boolean, $after: String) {
currentUser {
id
username
replFolderByPath(path: $path) {
ownerId: userId
pathnames
parent {
pathnames
}
folders {
id
name
pathnames
replsCount
folderType
}
repls(starred: $starred, after: $after) {
items {
id
title
isPrivate
slug
url
timeCreated
}
pageInfo {
nextCursor
}
}
}
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub mod airtable;
pub mod crosisdownload;
pub mod email;
pub mod r2;
pub mod replit;
pub mod replit_graphql;

pub mod utils {
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ async fn airtable_loop() -> Result<()> {
error!("Error with `{}`'s download: {err:#?}", user.fields.username);

user.fields.status = ProcessState::ErroredMain;
airtable::update_records(vec![user.clone()]).await?;
//arst airtable::update_records(vec![user.clone()]).await?;

// user.fields.failed_ids = errored.join(",");

Expand Down
46 changes: 46 additions & 0 deletions src/replit/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use reqwest::{
cookie::Jar,
header::{self, HeaderMap},
Client, Url,
};
use std::sync::Arc;

pub mod repls;

pub static REPLIT_GQL_URL: &str = "https://replit.com/graphql";

pub fn create_client(token: &String, client: Option<Client>) -> Result<Client, reqwest::Error> {
if let Some(client) = client {
return Ok(client);
}

Client::builder()
.user_agent(crate::utils::random_user_agent())
.default_headers(create_client_headers())
.cookie_provider(create_client_cookie_jar(token))
.build()
}

fn create_client_headers() -> HeaderMap {
let mut headers = header::HeaderMap::new();
headers.insert(
"X-Requested-With",
header::HeaderValue::from_static("XMLHttpRequest"),
);
headers.insert(
reqwest::header::REFERER,
header::HeaderValue::from_static("https://replit.com/~"),
);

headers
}

fn create_client_cookie_jar(token: &String) -> Arc<Jar> {
let cookie = &format!("connect.sid={token}; Domain=replit.com");
let url = REPLIT_GQL_URL.parse::<Url>().unwrap();

let jar = Jar::default();
jar.add_cookie_str(cookie, &url);

Arc::new(jar)
}
144 changes: 144 additions & 0 deletions src/replit/repls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
use std::collections::HashSet;

use super::{create_client, REPLIT_GQL_URL};
use anyhow::Result;
use graphql_client::{GraphQLQuery, Response};
use log::{debug, info, trace, warn};
use reqwest::{Client, StatusCode};
use tokio::time::{sleep, Duration};

type DateTime = String;
#[derive(GraphQLQuery)]
#[graphql(
schema_path = "src/graphql/schema 7.graphql",
query_path = "src/graphql/repls-query.graphql",
response_derives = "Debug"
)]
struct ReplList;

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Repl {
pub id: String,
pub title: String,
pub slug: String,
pub private: bool,
pub url: String,
pub time_created: String,
}
impl Repl {
pub async fn fetch(token: &str, client_opt: Option<Client>) -> Result<HashSet<Repl>> {
let client = create_client(&token.into(), client_opt)?;
let mut all_repls = HashSet::new();
let mut visited_folder_ids = HashSet::new();

Self::fetch_recursive("", "", &client, &mut all_repls, &mut visited_folder_ids).await?;

info!("got {} repls", all_repls.len());

Ok(all_repls)
}

async fn fetch_recursive(
path: &str,
folder_id: &str,
client: &Client,
all_repls: &mut HashSet<Repl>,
visited_folder_ids: &mut HashSet<String>,
) -> Result<()> {
if !folder_id.is_empty() && visited_folder_ids.contains(folder_id) {
info!("Skipping already visited folder: {} ({})", path, folder_id);
return Ok(());
}

if !folder_id.is_empty() {
visited_folder_ids.insert(folder_id.to_string());
}

info!("Traversing {} ({folder_id})", path);

let mut cursor = None;
let mut retry_count = 0;
let max_retries = 5;

loop {
let folder_query = ReplList::build_query(repl_list::Variables {
path: path.to_string(),
starred: None,
after: cursor.clone(),
});

let folder_data = loop {
if retry_count >= max_retries {
return Err(anyhow::anyhow!("Max retries reached for path {path}"));
}

let response = client.post(REPLIT_GQL_URL).json(&folder_query).send().await;

match response {
Ok(res) if res.status() == StatusCode::TOO_MANY_REQUESTS => {
let wait_time = Duration::from_secs(2u64.pow(retry_count));
warn!("Rate-limited - waiting {:?} before retrying", wait_time);
sleep(wait_time).await;
retry_count = (retry_count + 1).min(max_retries);
continue;
}
Ok(res) => break res,
Err(e) => {
warn!("Error fetching data: {:?}", e);
let wait_time = Duration::from_secs(2u64.pow(retry_count));
warn!("Waiting {:?} before retrying", wait_time);
sleep(wait_time).await;
retry_count = (retry_count + 1).min(max_retries);

continue;
}
}
};

let folder_data = folder_data.text().await?;

let folder: Response<repl_list::ResponseData> = serde_json::from_str(&folder_data)?;
log::trace!("{path}-{:#?}", folder);

let folder = folder
.data
.and_then(|data| data.current_user)
.and_then(|user| user.repl_folder_by_path)
.ok_or_else(|| anyhow::anyhow!("Failed to get folder data"))?;

// Process subfolders
for subfolder in folder.folders {
Box::pin(Self::fetch_recursive(
&subfolder.pathnames.join("/"),
&subfolder.id,
client,
all_repls,
visited_folder_ids,
))
.await?;
}

for repl in folder.repls.items {
all_repls.insert(Repl {
id: repl.id,
title: repl.title,
slug: repl.slug,
private: repl.is_private,
url: repl.url,
time_created: repl.time_created,
});

}

sleep(Duration::from_millis(250)).await;

// Check for next page
match folder.repls.page_info.next_cursor {
Some(next_cursor) => cursor = Some(next_cursor),
None => break,
}
}

Ok(())
}
}
Loading

0 comments on commit 042b51c

Please sign in to comment.