Skip to content

Commit

Permalink
Capture browser url (#1381)
Browse files Browse the repository at this point in the history
* capture url browser

* use applescript and only support macOS for now

* fix: conditionally get browser URL only on macOS

* add changes

* rewrite use accessibility api

* Remove unused dependency

* Add AppleScript support for Arc browser

* move deps to macos
  • Loading branch information
neo773 authored Feb 19, 2025
1 parent 50d86a5 commit 8ac04bb
Show file tree
Hide file tree
Showing 14 changed files with 244 additions and 23 deletions.
2 changes: 1 addition & 1 deletion screenpipe-server/benches/db_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async fn setup_large_db(size: usize) -> DatabaseManager {
.insert_video_chunk("test_video.mp4", "test_device")
.await
.unwrap();
let frame_id = db.insert_frame("test_device", None).await.unwrap();
let frame_id = db.insert_frame("test_device", None, None).await.unwrap();
let ocr_text = format!("OCR text {}", rng.gen::<u32>());
let text_json = format!(r#"{{"text": "{}"}}"#, ocr_text);
db.insert_ocr_text(
Expand Down
2 changes: 1 addition & 1 deletion screenpipe-server/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ async fn record_video(
while is_running.load(Ordering::SeqCst) {
if let Some(frame) = video_capture.ocr_frame_queue.pop() {
for window_result in &frame.window_ocr_results {
match db.insert_frame(&device_name, None).await {
match db.insert_frame(&device_name, None, window_result.browser_url.as_deref()).await {
Ok(frame_id) => {
let text_json =
serde_json::to_string(&window_result.text_json).unwrap_or_default();
Expand Down
6 changes: 4 additions & 2 deletions screenpipe-server/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ impl DatabaseManager {
&self,
device_name: &str,
timestamp: Option<DateTime<Utc>>,
browser_url: Option<&str>,
) -> Result<i64, sqlx::Error> {
let mut tx = self.pool.begin().await?;
debug!("insert_frame Transaction started");
Expand Down Expand Up @@ -328,12 +329,13 @@ impl DatabaseManager {

// Insert the new frame with file_path as name
let id = sqlx::query(
"INSERT INTO frames (video_chunk_id, offset_index, timestamp, name) VALUES (?1, ?2, ?3, ?4)",
"INSERT INTO frames (video_chunk_id, offset_index, timestamp, name, browser_url) VALUES (?1, ?2, ?3, ?4, ?5)",
)
.bind(video_chunk_id)
.bind(offset_index)
.bind(timestamp)
.bind(file_path)
.bind(browser_url.map(|s| s.to_string()))
.execute(&mut *tx)
.await?
.last_insert_rowid();
Expand Down Expand Up @@ -400,7 +402,7 @@ impl DatabaseManager {
"Failed to insert OCR text for frame_id: {} after {} attempts",
frame_id, MAX_RETRIES
);
return Err(sqlx::Error::PoolTimedOut); // Return error after max retries
return Err(sqlx::Error::PoolTimedOut);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- Add migration script here
ALTER TABLE frames ADD COLUMN browser_url TEXT DEFAULT NULL;
2 changes: 1 addition & 1 deletion screenpipe-server/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ async fn add_frame_to_db(
let db = &state.db;

let frame_id = db
.insert_frame(device_name, Some(frame.timestamp.unwrap_or_else(Utc::now)))
.insert_frame(device_name, Some(frame.timestamp.unwrap_or_else(Utc::now)), None)
.await?;

if let Some(ocr_results) = &frame.ocr_results {
Expand Down
16 changes: 8 additions & 8 deletions screenpipe-server/tests/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ mod tests {
.insert_video_chunk("test_video.mp4", "test_device")
.await
.unwrap();
let frame_id = db.insert_frame("test_device", None).await.unwrap();
let frame_id = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id,
"Hello, world!",
Expand Down Expand Up @@ -200,7 +200,7 @@ mod tests {
.insert_video_chunk("test_video.mp4", "test_device")
.await
.unwrap();
let frame_id = db.insert_frame("test_device", None).await.unwrap();
let frame_id = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id,
"Hello from OCR",
Expand Down Expand Up @@ -290,7 +290,7 @@ mod tests {
.insert_video_chunk("test_video.mp4", "test_device")
.await
.unwrap();
let frame_id1 = db.insert_frame("test_device", None).await.unwrap();
let frame_id1 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id1,
"Hello from OCR 1",
Expand Down Expand Up @@ -327,7 +327,7 @@ mod tests {
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;

// Insert remaining data
let frame_id2 = db.insert_frame("test_device", None).await.unwrap();
let frame_id2 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id2,
"Hello from OCR 2",
Expand Down Expand Up @@ -477,7 +477,7 @@ mod tests {
.insert_video_chunk("test_video.mp4", "test_device")
.await
.unwrap();
let frame_id1 = db.insert_frame("test_device", None).await.unwrap();
let frame_id1 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id1,
"Hello from OCR 1",
Expand Down Expand Up @@ -512,7 +512,7 @@ mod tests {
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;

// Insert remaining data
let frame_id2 = db.insert_frame("test_device", None).await.unwrap();
let frame_id2 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id2,
"Hello from OCR 2",
Expand Down Expand Up @@ -920,7 +920,7 @@ mod tests {
.unwrap();

// Insert first frame with OCR
let frame_id1 = db.insert_frame("test_device", None).await.unwrap();
let frame_id1 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id1,
"Hello from frame 1",
Expand All @@ -934,7 +934,7 @@ mod tests {
.unwrap();

// Insert second frame with OCR
let frame_id2 = db.insert_frame("test_device", None).await.unwrap();
let frame_id2 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id2,
"Hello from frame 2",
Expand Down
10 changes: 5 additions & 5 deletions screenpipe-server/tests/endpoint_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ mod tests {
.insert_video_chunk("test_video1.mp4", "test_device")
.await
.unwrap();
let frame_id1 = db.insert_frame("test_device", None).await.unwrap();
let frame_id2 = db.insert_frame("test_device", None).await.unwrap();
let frame_id1 = db.insert_frame("test_device", None, None).await.unwrap();
let frame_id2 = db.insert_frame("test_device", None, None).await.unwrap();
db.insert_ocr_text(
frame_id1,
"This is a test OCR text", // 21 chars
Expand Down Expand Up @@ -363,7 +363,7 @@ mod tests {
.insert_video_chunk("test_video1.mp4", "test_device")
.await
.unwrap();
let frame_id1 = db.insert_frame("test_device", None).await.unwrap();
let frame_id1 = db.insert_frame("test_device", None, None).await.unwrap();
let audio_chunk_id1 = db.insert_audio_chunk("test_audio1.wav").await.unwrap();

let now = DateTime::parse_from_rfc3339("2024-09-21T10:49:23.240367Z")
Expand Down Expand Up @@ -551,14 +551,14 @@ mod tests {
.insert_video_chunk("old_video.mp4", "test_device")
.await
.unwrap();
let old_frame_id = db.insert_frame("test_device", None).await.unwrap();
let old_frame_id = db.insert_frame("test_device", None, None).await.unwrap();

// Insert recent data
let _ = db
.insert_video_chunk("recent_video.mp4", "test_device")
.await
.unwrap();
let recent_frame_id = db.insert_frame("test_device", None).await.unwrap();
let recent_frame_id = db.insert_frame("test_device", None, None).await.unwrap();

// Insert OCR data with different timestamps
sqlx::query("UPDATE frames SET timestamp = ? WHERE id = ?")
Expand Down
2 changes: 1 addition & 1 deletion screenpipe-server/tests/tags_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ async fn insert_test_data(db: &Arc<DatabaseManager>) {
.unwrap();

// Insert test frame
let frame_id = db.insert_frame("test_device", None).await.unwrap();
let frame_id = db.insert_frame("test_device", None, None).await.unwrap();

// Insert test OCR data
db.insert_ocr_text(
Expand Down
5 changes: 4 additions & 1 deletion screenpipe-vision/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ xcap = "0.2.2"
[target.'cfg(target_os = "macos")'.dependencies]
libc = "=0.2.164"
cidre = { git = "https://github.com/yury/cidre.git", version = "0.5.0" }
accessibility-sys = "0.1.3"
core-foundation = "0.9.4"
url = "2.5.0"

[target.'cfg(target_os = "linux")'.dependencies]
libc = "=0.2.164"
libc = "=0.2.164"
137 changes: 137 additions & 0 deletions screenpipe-vision/src/browser_utils/macos.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use accessibility_sys::{
kAXChildrenAttribute, kAXFocusedWindowAttribute, kAXRoleAttribute, kAXTextFieldRole,
kAXValueAttribute, AXUIElementCopyAttributeValue, AXUIElementCreateApplication,
AXUIElementRef,
};
use anyhow::Result;
use core_foundation::{
base::{CFTypeRef, TCFType},
string::CFString,
array::CFArray,
};
use url::Url;

use super::BrowserUrlDetector;

pub struct MacOSUrlDetector;

impl MacOSUrlDetector {
pub fn new() -> Self {
Self
}

unsafe fn find_url_field(&self, element: AXUIElementRef) -> Option<AXUIElementRef> {
let mut role: CFTypeRef = std::ptr::null_mut();
let status = AXUIElementCopyAttributeValue(
element,
CFString::from_static_string(kAXRoleAttribute).as_concrete_TypeRef(),
&mut role,
);

if status == accessibility_sys::kAXErrorSuccess {
let cf_role = CFString::wrap_under_get_rule(role as _);
let role_str = cf_role.to_string();

if role_str == kAXTextFieldRole {
let mut value: CFTypeRef = std::ptr::null_mut();
let status = AXUIElementCopyAttributeValue(
element,
CFString::from_static_string(kAXValueAttribute).as_concrete_TypeRef(),
&mut value,
);

if status == accessibility_sys::kAXErrorSuccess {
let url_str = CFString::wrap_under_get_rule(value as _).to_string();
let url_to_parse = if !url_str.starts_with("http://") && !url_str.starts_with("https://") {
format!("https://{}", url_str)
} else {
url_str
};

if Url::parse(&url_to_parse).is_ok() {
return Some(element);
}
}
}
}

let mut children: CFTypeRef = std::ptr::null_mut();
let status = AXUIElementCopyAttributeValue(
element,
CFString::from_static_string(kAXChildrenAttribute).as_concrete_TypeRef(),
&mut children,
);

if status == accessibility_sys::kAXErrorSuccess {
let children_array = CFArray::<*const std::ffi::c_void>::wrap_under_get_rule(children as _);
for child in children_array.iter() {
if let Some(found) = self.find_url_field(*child as AXUIElementRef) {
return Some(found);
}
}
}

None
}

fn get_url_via_applescript(&self, script: &str) -> Result<Option<String>> {
let output = std::process::Command::new("osascript")
.arg("-e")
.arg(script)
.output()?;

if output.status.success() {
let url = String::from_utf8(output.stdout)?.trim().to_string();
return Ok(Some(url));
}
Ok(None)
}

fn get_url_via_accessibility(&self, process_id: i32) -> Result<Option<String>> {
unsafe {
let app_element = AXUIElementCreateApplication(process_id);

let mut focused_window: CFTypeRef = std::ptr::null_mut();
let status = AXUIElementCopyAttributeValue(
app_element,
CFString::from_static_string(kAXFocusedWindowAttribute).as_concrete_TypeRef(),
&mut focused_window,
);

if status != accessibility_sys::kAXErrorSuccess {
return Ok(None);
}

let window_ref = focused_window as AXUIElementRef;
let address_bar = match self.find_url_field(window_ref) {
Some(bar) => bar,
None => return Ok(None),
};

let mut url_value: CFTypeRef = std::ptr::null_mut();
let status = AXUIElementCopyAttributeValue(
address_bar,
CFString::from_static_string(kAXValueAttribute).as_concrete_TypeRef(),
&mut url_value,
);

if status == accessibility_sys::kAXErrorSuccess {
let url = CFString::wrap_under_get_rule(url_value as _).to_string();
Ok(Some(url))
} else {
Ok(None)
}
}
}
}

impl BrowserUrlDetector for MacOSUrlDetector {
fn get_active_url(&self, app_name: &str, process_id: i32) -> Result<Option<String>> {
if app_name == "Arc" {
let script = r#"tell application "Arc" to return URL of active tab of front window"#;
self.get_url_via_applescript(script)
} else {
self.get_url_via_accessibility(process_id)
}
}
}
36 changes: 36 additions & 0 deletions screenpipe-vision/src/browser_utils/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use anyhow::Result;

// Trait definition
pub trait BrowserUrlDetector {
fn get_active_url(&self, app_name: &str, process_id: i32) -> Result<Option<String>>;
}

// Factory function
pub fn create_url_detector() -> Box<dyn BrowserUrlDetector> {
#[cfg(target_os = "macos")]
return Box::new(MacOSUrlDetector::new());

#[cfg(not(target_os = "macos"))]
return Box::new(UnsupportedUrlDetector::new());
}

// Unsupported implementation
pub struct UnsupportedUrlDetector;

impl UnsupportedUrlDetector {
pub fn new() -> Self {
Self
}
}

impl BrowserUrlDetector for UnsupportedUrlDetector {
fn get_active_url(&self, _app_name: &str, _process_id: i32) -> Result<Option<String>> {
Ok(None)
}
}

// Re-export MacOS implementation
#[cfg(target_os = "macos")]
mod macos;
#[cfg(target_os = "macos")]
pub use macos::MacOSUrlDetector;
Loading

0 comments on commit 8ac04bb

Please sign in to comment.