In [None]:
!pip install conllu pandas

Collecting conllu
  Downloading conllu-6.0.0-py3-none-any.whl.metadata (21 kB)
Downloading conllu-6.0.0-py3-none-any.whl (16 kB)
Installing collected packages: conllu
Successfully installed conllu-6.0.0


In [None]:
import os
import glob
from conllu import parse_incr
from collections import defaultdict, Counter
import pandas as pd

In [None]:
# Set this to your path
UD_DIR = "/content/"

In [None]:
# Stores results
position_counts = defaultdict(lambda: Counter())
examples = defaultdict(lambda: {"pre": [], "post": []})

for filepath in glob.glob(os.path.join(UD_DIR, "*.conllu")):
    lang = os.path.basename(filepath).split("-")[0]
    with open(filepath, "r", encoding="utf-8") as f:
        for sent in parse_incr(f):
            id_to_token = {tok["id"]: tok for tok in sent if isinstance(tok["id"], int)}

            for tok in sent:
                if tok["deprel"] in {"acl", "acl:relcl"} and isinstance(tok["head"], int):
                    relcl = tok
                    head = id_to_token.get(relcl["head"])
                    if not head:
                        continue

                    # Determine position
                    position = "pre" if relcl["id"] < head["id"] else "post"

                    # Is the head a subject, object, or oblique in its clause?
                    head_deprel = head["deprel"]
                    category = "other"
                    if head_deprel.startswith("nsubj"):
                        category = "subject"
                    elif head_deprel == "obj":
                        category = "object"
                    elif head_deprel.startswith("obl"):
                        category = "oblique"

                    key = f"{category}_{position}"
                    position_counts[lang][key] += 1

                    # Save examples
                    if len(examples[lang][position]) < 5:
                        text = " ".join(tok["form"] for tok in sent)
                        examples[lang][position].append(text)


In [None]:
# Convert to DataFrame
df = pd.DataFrame(position_counts).fillna(0).astype(int).T
df["total"] = df.sum(axis=1)

print("\n=== Relative Clause Position Summary ===")
print(df)

# Show examples
for lang in examples:
    print(f"\n--- {lang.upper()} ---")
    print("PRENOMINAL RELATIVE CLAUSES:")
    for ex in examples[lang]["pre"]:
        print(f"  [PRE] {ex}")
    print("POSTNOMINAL RELATIVE CLAUSES:")
    for ex in examples[lang]["post"]:
        print(f"  [POST] {ex}")



=== Relative Clause Position Summary ===
          subject_pre  object_pre  other_pre  oblique_pre  total
tr_tuecl            3           1          1            1      6
uz_tuecl            3           0          0            2      5
az_tuecl            3           1          0            0      4
ky_tuecl            7           1          0            4     12

--- TR_TUECL ---
PRENOMINAL RELATIVE CLAUSES:
  [PRE] Yatılı kalacak misafir kapıyı çalmıştı ama ev sahibi hala temizlik yapıyordu , restorandan sipariş ettiği yemek ise daha gelecekti .
  [PRE] Yatılı kalacak misafir kapıyı çalmıştı ama ev sahibi hala temizlik yapıyordu , restorandan sipariş ettiği yemek ise daha gelecekti .
  [PRE] Çözmeye çalıştığımız sorun kitaplıkta beş kitaplık yer bile olmaması .
  [PRE] Gençlere erişilebilir olmayan hedefleri terk ettirmeliyiz .
  [PRE] Öğretmen kitabı alıp okula gitti .
POSTNOMINAL RELATIVE CLAUSES:

--- UZ_TUECL ---
PRENOMINAL RELATIVE CLAUSES:
  [PRE] Yotgani kelgan mehmon eshikni