In [None]:
use anyhow::{Context, Result};
use lopdf;
use regex::Regex;
use std::collections::{HashMap, HashSet};
use std::env;
use std::io::{self, Write};
use rust_stemmers::{Algorithm, Stemmer};

// قائمة بكلمات التوقف العربية
const ARABIC_STOP_WORDS: &[&str] = &[
    "و", "في", "من", "إلى", "على", "أن", "لا", "ما", "هذا", "هذه", "ذلك", "هؤلاء",
    "إذا", "إن", "كان", "يكون", "عن", "مع", "هو", "هي", "هم", "ب", "ك", "ل", "يا",
];

struct NLPProcessor {
    stemmer: Stemmer,
    stop_words: HashSet<String>,
}

impl NLPProcessor {
    fn new() -> Self {
        let mut stop_words = HashSet::new();
        for word in ARABIC_STOP_WORDS {
            stop_words.insert(word.to_string());
        }

        NLPProcessor {
            stemmer: Stemmer::create(Algorithm::Arabic),
            stop_words,
        }
    }

    fn process_text(&self, text: &str) -> Vec<String> {
        text.split_whitespace()
            .filter_map(|word| {
                let cleaned = word.trim_matches(|c: char| !c.is_alphanumeric()).to_lowercase();
                if !cleaned.is_empty() && !self.stop_words.contains(&cleaned) {
                    Some(self.stemmer.stem(&cleaned).to_string())
                } else {
                    None
                }
            })
            .collect()
    }
}

fn extract_text_from_pdf(path: &str) -> Result<String> {
    let doc = lopdf::Document::load(path).context("Failed to load PDF document")?;

    let mut text = String::new();
    for page_number in 1..=doc.get_pages().len() {
        if let Ok(page_text) = doc.extract_text(&[page_number]) {
            text.push_str(&page_text);
            text.push('\n');
        }
    }

    let re = Regex::new(r"\s+").unwrap();
    Ok(re.replace_all(&text, " ").into_owned())
}

fn semantic_search(query: &str, corpus: &[&str], nlp: &NLPProcessor) -> Vec<(usize, f32)> {
    let query_terms = nlp.process_text(query);

    let mut tfidf_scores = HashMap::new();
    for (doc_id, doc) in corpus.iter().enumerate() {
        let doc_terms = nlp.process_text(doc);
        let mut score = 0.0_f32;

        for term in &query_terms {
            let tf = doc_terms.iter().filter(|&t| t == term).count() as f32;
            let idf = (corpus.len() as f32 / (1.0 + corpus.iter()
                .filter(|&&d| nlp.process_text(d).contains(term))
                .count() as f32).ln_1p();

            score += tf * idf;
        }

        if score > 0.0 {
            tfidf_scores.insert(doc_id, score);
        }
    }

    let mut results: Vec<_> = tfidf_scores.into_iter().collect();
    results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
    results
}

fn chat_loop(pdf_text: &str) {
    let nlp = NLPProcessor::new();
    let corpus: Vec<&str> = pdf_text.split('\n').collect();
    let mut synonyms = HashMap::new();
    synonyms.insert("برنامج", vec!["تطبيق", "سوفت وير"]);
    synonyms.insert("ملف", vec!["وثيقة", "مستند"]);

    println!("مرحبًا! أدخل استفسارك أو اكتب 'خروج' للمغادرة.");
    println!("خيارات البحث:");
    println!("1. بحث بالكلمات الدقيقة (ابدأ ب /k)");
    println!("2. بحث دلالي (افتراضي)");

    loop {
        print!("> ");
        io::stdout().flush().unwrap();

        let mut input = String::new();
        io::stdin().read_line(&mut input).unwrap();
        let input = input.trim();

        if input == "خروج" {
            println!("مع السلامة!");
            break;
        }

        let (search_type, query) = if input.starts_with("/k ") {
            ("keyword", input.trim_start_matches("/k ").trim())
        } else {
            ("semantic", input)
        };

        // معالجة المرادفات
        let expanded_query = synonyms.iter().fold(query.to_string(), |acc, (key, values)| {
            acc.replace(key, &values.join("|"))
        });

        match search_type {
            "keyword" => {
                let results: Vec<&str> = corpus
                    .iter()
                    .filter(|&&line| line.contains(query))
                    .copied()
                    .collect();

                print_results(&results);
            }
            _ => {
                let results = semantic_search(&expanded_query, &corpus, &nlp);
                if !results.is_empty() {
                    println!("النتائج الدلالية الأكثر صلة:");
                    for (i, (doc_id, score)) in results.iter().take(3).enumerate() {
                        println!("{}. [Score: {:.2}] {}", i + 1, score, corpus[*doc_id]);
                    }
                } else {
                    println!("لم أجد نتائج ذات صلة.");
                }
            }
        }
    }
}

fn print_results(results: &[&str]) {
    if !results.is_empty() {
        println!("عدد النتائج: {}", results.len());
        for (i, result) in results.iter().take(5).enumerate() {
            println!("{}. {}", i + 1, result);
        }
    } else {
        println!("لا توجد نتائج.");
    }
}

fn main() -> Result<()> {
    let args: Vec<String> = env::args().collect();
    if args.len() < 2 {
        eprintln!("الاستخدام: {} <ملف1.pdf> [ملف2.pdf ...]", args[0]);
        std::process::exit(1);
    }

    let mut combined_text = String::new();
    for path in &args[1..] {
        let text = extract_text_from_pdf(path)?;
        combined_text.push_str(&text);
        combined_text.push('\n');
    }

    chat_loop(&combined_text);
    Ok(())
}

In [1]:
!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

[1minfo:[0m downloading installer
[0m[1minfo: [0mprofile set to 'default'
[0m[1minfo: [0mdefault host triple is x86_64-unknown-linux-gnu
[0m[1minfo: [0msyncing channel updates for 'stable-x86_64-unknown-linux-gnu'
[0m[1minfo: [0mlatest update on 2025-04-03, rust version 1.86.0 (05f9846f8 2025-03-31)
[0m[1minfo: [0mdownloading component 'cargo'
[0m[1minfo: [0mdownloading component 'clippy'
[0m[1minfo: [0mdownloading component 'rust-docs'
[0m[1minfo: [0mdownloading component 'rust-std'
 27.1 MiB /  27.1 MiB (100 %)  26.6 MiB/s in  1s
[0m[1minfo: [0mdownloading component 'rustc'
 72.8 MiB /  72.8 MiB (100 %)  33.1 MiB/s in  2s
[0m[1minfo: [0mdownloading component 'rustfmt'
[0m[1minfo: [0minstalling component 'cargo'
  8.8 MiB /   8.8 MiB (100 %)   7.4 MiB/s in  1s
[0m[1minfo: [0minstalling component 'clippy'
[0m[1minfo: [0minstalling component 'rust-docs'
 21.2 MiB /  21.2 MiB (100 %)   2.7 MiB/s in  8s
[0m[1minfo: [0minstalling component 'rust-

In [2]:
import os
os.environ['PATH'] += ":$HOME/.cargo/bin"

In [3]:
# تثبيت Rust
!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

# تحديث متغير PATH لجعله متاحًا للخلايا اللاحقة
import os
home_dir = os.path.expanduser("~") # للحصول على المسار الصحيح لـ $HOME
cargo_bin_path = os.path.join(home_dir, ".cargo", "bin")
if cargo_bin_path not in os.environ['PATH']:
    os.environ['PATH'] = f"{cargo_bin_path}:{os.environ['PATH']}"

print("PATH updated.")
!echo $PATH # للتحقق

[1minfo:[0m downloading installer
[0m[1m[33mwarn: [0mIt looks like you have an existing rustup settings file at:
[0m[1m[33mwarn: [0m/root/.rustup/settings.toml
[0m[1m[33mwarn: [0mRustup will install the default toolchain as specified in the settings file,
[0m[1m[33mwarn: [0minstead of the one inferred from the default host triple.
[0m[1minfo: [0mprofile set to 'default'
[0m[1minfo: [0mdefault host triple is x86_64-unknown-linux-gnu
[0m[1m[33mwarn: [0mUpdating existing toolchain, profile choice will be ignored
[0m[1minfo: [0msyncing channel updates for 'stable-x86_64-unknown-linux-gnu'
[0m[1minfo: [0mdefault toolchain set to 'stable-x86_64-unknown-linux-gnu'

  [0m[1mstable-x86_64-unknown-linux-gnu unchanged[0m - rustc 1.86.0 (05f9846f8 2025-03-31)

[0m[1m
Rust is installed now. Great!
[0m
To get started you may need to restart your current shell.
This would reload your [0m[1mPATH[0m environment variable to include
Cargo's bin directory ($HOME/

In [4]:
!which cargo

/root/.cargo/bin/cargo


In [5]:
!cargo --version

cargo 1.86.0 (adf9b6ad1 2025-02-28)


In [6]:
!rustc --version
!cargo --version

rustc 1.86.0 (05f9846f8 2025-03-31)
cargo 1.86.0 (adf9b6ad1 2025-02-28)


In [7]:
!sudo apt-get install -y cmake libfontconfig1-dev

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libfontconfig1-dev is already the newest version (2.13.1-4.2ubuntu5).
libfontconfig1-dev set to manually installed.
cmake is already the newest version (3.22.1-1ubuntu1.22.04.2).
0 upgraded, 0 newly installed, 0 to remove and 34 not upgraded.


In [None]:
إليك دليل تفصيلي لبدء مشروع بلغة Rust مع أمثلة عملية:

1. تثبيت الأدوات المطلوبة
bash
# تثبيت Rust (لينكس/ماك)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh

# للتحديث لآخر نسخة
rustup update
2. إنشاء مشروع جديد
bash
# إنشاء مشروع ثنائي (برنامج قابل للتشغيل)
cargo new my_project --bin

# إنشاء مشروع مكتبة
cargo new my_lib --lib
3. هيكل الملفات الأساسي
my_project/
├── Cargo.toml    # إعدادات المشروع والتبعيات
├── src/
│   └── main.rs   # نقطة الدخول الرئيسية
└── tests/        # ملفات الاختبارات (اختياري)
4. ملف Cargo.toml نموذجي
toml
[package]
name = "my_project"
version = "0.1.0"
edition = "2021"

[dependencies]
serde = "1.0"    # مثال لإضافة تبعية
5. مثال لملف main.rs بسيط
rust
fn main() {
    println!("مرحبًا بالعالم! 🦀");

    let numbers = vec![1, 2, 3, 4, 5];
    let sum: i32 = numbers.iter().sum();

    println!("مجموع الأرقام: {}", sum);

    let result = divide(10, 2);
    match result {
        Ok(val) => println!("نتيجة القسمة: {}", val),
        Err(e) => println!("خطأ: {}", e),
    }
}

fn divide(a: i32, b: i32) -> Result<f64, String> {
    if b == 0 {
        Err("لا يمكن القسمة على صفر".to_string())
    } else {
        Ok(a as f64 / b as f64)
    }
}
6. الأوامر الأساسية
bash
# تجميع وتشغيل المشروع
cargo run

# تجميع للإصدار النهائي
cargo build --release

# تشغيل الاختبارات
cargo test

# تحديث التبعيات
cargo update

# توليد الوثائق
cargo doc --open
7. إعدادات التطوير المتقدمة
أ. إضافة ملف .rustfmt.toml لتنسيق الكود

In [8]:
!cargo new my_project --bin

[1m[32m    Creating[0m binary (application) `my_project` package
[1m[36mnote[0m[1m:[0m see more `Cargo.toml` keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html


In [9]:
%cd /content/my_project
!cargo run

/content/my_project
[1m[31merror[0m[1m:[0m failed to parse manifest at `/content/my_project/Cargo.toml`

Caused by:
  no targets specified in the manifest
  either src/lib.rs, src/main.rs, a [lib] section, or [[bin]] section must be present
