In [6]:
import json, re, os
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from bs4 import BeautifulSoup
from urllib.request import urlopen
from sklearn.feature_extraction import DictVectorizer

try:
    from google.colab import drive
    drive.mount('/content/drive/')
    __dir__ = '/content/drive/My Drive/conditional_order/'
except ImportError:
    __dir__ = os.path.abspath(os.getcwd()) + os.sep

# Scrape the data

In [7]:
def get_cases():
    base = "https://www.justice.gov.il"
    prev_links = None
    links = []
    n=1
    ret = set()
    while prev_links != links:
        url = f"{base}/Units/StateAttorney/Criminal/Pages/Conditional-Order.aspx?WPID=WPQ8&PN={n}"
        soup = BeautifulSoup(urlopen(url).read().decode("utf8"), 'html.parser')
        prev_links = links
        links = [base + str(link.get("href")) for link in soup.findAll("a") if link.get_text()=="להרחבה"]
        ret=ret | set(links)
        n+=1
    return list(ret)

def get_case_details(url):
    soup = BeautifulSoup(urlopen(url).read().decode("utf8"), 'html.parser')
    table_body = soup.find("div", attrs={"class": "gridDivs"})
    rows = [(tr.find("div", attrs={"class": "TitleDiv"}).get_text(), tr.find("div", attrs={"class": "GridOrangContent"}).get_text(),)
            for tr in table_body.find_all('div', attrs={"class": "DataItemGrid"})]
    ret = {k.strip(": \r\n\t"): v.strip(": \r\n\t") for k,v in rows}
    return ret

In [8]:
case_links = get_cases()
print (len(case_links))

519


In [None]:
error_count = 0
with open("conditional_order.json",'w') as f:
    f.write("[\n")
    first = True
    for url in tqdm(case_links):
        if not first:
            f.write("\n,\n")
        try:
            json.dump(get_case_details(url),f, indent=4)
            first = False
        except:
            print ("Error with "+url)
            error_count +=1
            first = True
    f.write("\n]")
print (f"number of errors: {error_count}")

In [None]:
with open(__dir__ + "conditional_order.json", 'r') as f:
    conditional_order = json.load(f)
df = pd.DataFrame(conditional_order).set_index("מספר תיק")
df.head()

In [None]:
df.to_csv(__dir__ + "conditional_order.csv")

# Preprocess the data

In [0]:
if "df" not in globals():
    df = pd.read_csv(__dir__ + "conditional_order.csv")

In [0]:
seif_pattern = re.compile(r"(?:סעיף|ס')\s{0,2}[\d()אבגדהוזחטיכלמנסעפצקרשת]+")

In [31]:
df["seifim"] = df["הוראות החיקוק שפורטו בהסדר"].apply(seif_pattern.findall)
df

Unnamed: 0_level_0,תיאור העובדות המהוות עבירה שבהן הודה החשוד,הוראות החיקוק שפורטו בהסדר,נימוקים משתנים לסגירת התיק בהסדר,יחידה,תנאי ההסדר,seifim
מספר תיק,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3373/15,"בתאריך 30.1.15, החזיק החשוד, לשם מסחר, בדוכן ב...",החזקת טובין שסומנו או יובאו לישראל שלא כדין – ...,"החשוד הודה בעובדות המהוות עבירה, לחשוד אין עבר...",פרקליטות מחוז דרום (פלילי),קנס בסך 2900 ₪ והשמדת פריטים שנתפסו.,"[סעיף 60(א)(4), סעיף 61(ה), סעיף 62(ב)(3)]"
7937/15,"ביום 09/06/15 או סמוך לאחריו, מצא החשוד 12 המח...",א. גניבה - עבירה לפי סעיף 384 לחוק העונשין התש...,"היקף המרמה אינו מאוד גבוה, שכן מדובר על צ'ק אח...",פרקליטות מחוז תל-אביב (פלילי),"א. תשלום לאוצר המדינה בסך 2,000 ₪.ב. פיצוי למת...",[סעיף 384]
4625/15,החשודות הינן סייעת וגננת בגן בצפון הארץ. ביום ...,השארת ילד בלא השגחה ברשלנות – עבירה לפי סעיף 3...,החשודות הודו בעובדות המהוות עבירהלחשודות אין ע...,פרקליטות מחוז חיפה (פלילי),"פיצוי כספי להורי הילד בסך 1,000 ₪תשלום לאוצר ה...",[סעיף 361]
3613/16,"1. במועדים הרלבנטיים לתלונת המתלוננת, עבד החשו...",ניסיון למעשה מגונה – עבירה לפי סעיף 25 + 348 ...,א. החשוד הודה בעובדות המהוות עבירה.\r\nב. לחשו...,פרקליטות מחוז צפון (פלילי),"-פיצוי ע""ס 2,500 ₪ למתלוננת.\r\n-העברת מכתב הת...",[סעיף 25]
9213/13,"הנאשמת הינה בעלת כלב מסוג ""רועה בלגי"". בשלהי ח...",מעשי פזיזית ורשלנות (אי נקיטת צעדי זהירות בחיה...,הנאשמת הודתה בעובדות המהוות עברה ולקחה אחריות ...,פרקליטות מחוז תל-אביב (פלילי),"1. תשלום לאוצר המדינה בסך 800 ש""ח אשר שולם עד ...",[סעיף 338]
...,...,...,...,...,...,...
537/17,"1. ביום 28/10/16, עבד החשוד בעבודות ניקיון במת...",ניסיון למעשה מגונה – עבירה לפי ס' 348(ג) בצירו...,א. החשוד הודה בעובדות המהוות עבירה.\r\nב. לחשו...,פרקליטות מחוז צפון (פלילי),"1. פיצוי נפגעת העבירה בסך כולל של 3,600 ₪ (_3_...","[ס' 348(ג), ס' 25]"
2855/16,החשודה זייפה בקשה שהגישה לבית המשפט המחוזי ואש...,זיוף מסמך בכוונה לקבל באמצעותו דבר – עבירה לפי...,החשודה הודתה בעובדות המהוות עבירהלחשודה אין עב...,פרקליטות מחוז חיפה (פלילי),"תשלום לאוצר המדינה בסך 1,500 ₪, אשר ישולם עד ל...","[סעיף 418, סעיף 420]"
2263/18,"החשודות הן מנהלת גן (להלן: ""חשודה 1"") ואחת מהס...",הזנחת ילד בלא השגחה ראויה - עבירה לפי סעיף 361...,א. החשודות הודו בעובדות המהוות עבירה והביעו חר...,פרקליטות מחוז ירושלים (פלילי),"חשודה 1-\r\nא.\tתכנית של""צ בהיקף של 45 שעות, ל...",[סעיף 361]
7949/15,בתקופה הרלוונטית שימש החשוד כנהג אוטובוס בחברת...,השארת ילד בלא השגחה – עבירה לפי סעיף 361 לחוק ...,החשוד הודה בעובדות המהוות עבירה; לחשוד אין עבר...,פרקליטות מחוז דרום (פלילי),"פיצוי בסך 7,000 ₪.",[סעיף 361]


In [32]:
df[df["seifim"].apply(lambda s: s==[])]

Unnamed: 0_level_0,תיאור העובדות המהוות עבירה שבהן הודה החשוד,הוראות החיקוק שפורטו בהסדר,נימוקים משתנים לסגירת התיק בהסדר,יחידה,תנאי ההסדר,seifim
מספר תיק,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4269/15,אירוע ראשון- נ.כ. קטינה ילידת 27.1.1998 (להלן:...,"מעשים מגונים- 348(ג) לחוק העונשין, התשל""ז-1977.",החשוד הודה בעובדות המהוות עבירהלחשוד אין עבר פ...,פרקליטות מחוז דרום (פלילי),"תשלום פיצוי בסך 15,000 ₪ לכל אחת מהמתלוננותמכת...",[]
59/14,"הנאשמת, בהיותה מנהלת בכירה בארגון ביטחוני, היי...",סעיפים 248 ו-415 לחוק העונשין.,"הנאשמת, כבת חמישים, ללא עבר פלילי אשר שירתה שי...",פרקליטות המדינה - המחלקה הכלכלית,"1. קנס של 29,200 ש""ח.\n2. הודייה במסגרת תובענה...",[]
7331/15,"החשוד התרשל בשמירה על כלבו, ותוך כדי טיול עמו ...",אי נקיטת אמצעי זהירות בחיה – 338(א)(6) לחוק הע...,א. החשוד הודה בעובדות המהוות עבירה.\r\nב. לחשו...,פרקליטות מחוז מרכז (פלילי),"פיצוי בסך 5,000 ₪ למתלוננת.",[]
1162/17,במהלך שנת 2017 במס' מועדים שאינם ידועים במדויק...,החזקה ושימוש בסם לצריכה עצמית- עבירה לפי סעיפי...,א. החשוד הודה בעובדות המהוות עבירה.\r\nב. לחשו...,פרקליטות המדינה - המחלקה לחקירות שוטרים,א. הצהרת החשוד לפיה הוא מתחייב להימנע מביצוע ע...,[]
105778/12,החשודה עבדה כקופאית בסופרפארם וגנבה בשנת 2011 ...,גניבה – עבירה לפי סעיפים 383+384 לחוק העונשין...,1. החשודה הודתה בעובדות המהוות עבירה.\r\n2. לח...,פרקליטות מחוז חיפה (פלילי),תשלום בסך של 3000 ₪ לאוצר המדינה.,[]
303/18,"בתקופה הרלוונטית לכתב העובדות, שירת הנאשם כשוט...",החזקה ושימוש בסם לצריכה עצמית- עבירה לפי סעיפי...,א. החשוד הודה בעובדות המהוות עבירה.\r\nב. לחשו...,פרקליטות המדינה - המחלקה לחקירות שוטרים,א. הצהרת החשוד לפיה הוא מתחייב להימנע מביצוע ע...,[]
5790/16,החשוד צילם את רגלי ופלג גופה התחתון של המתלוננ...,הטרדה מינית – עבירה לפי סעיפים 3(א)(5) + 5(א) ...,א. החשוד הודה בעובדות המהוות עבירה.ב. לחשוד אי...,פרקליטות מחוז מרכז (פלילי),"1. 4,000 ₪ פיצוי לנפגעת העבירה.2. עמידה בתוכני...",[]
5106/17,בתאריך 10/11/17 בשעה 07:50 או בסמוך לכך ליוותה...,השארת ילד בלא השגחה או במטרה לנטוש עבירה לפי ס...,1. החשודה הודתה בעובדות המהוות עבירה וכן הודתה...,פרקליטות מחוז חיפה (פלילי),"תשלום פיצוי עבור הקטין באמצעות הוריו בסך 2,000 .₪",[]
1391/17,"החשוד, נגע בישבנה של המתלוננת ידידתו, עת שהו ל...","מעשה מגונה – עבירה לפי 348(ג) לחוק העונשין, הת...",א. החשוד הודה בעובדות המהוות עבירה.ב. לחשוד אי...,פרקליטות מחוז מרכז (פלילי),"1. 3,000 ₪ פיצוי לנפגעת העבירה2. מכתב התנצלות.",[]
5580/17,"הדליק ברשלנות בחצר ביתו מעשנת מחוברת למיכל גז,...",מעשה פזיזות ורשלנות – עבירה לפי 338 (א)(3)לחוק...,א. החשוד הודה בעובדות המהוות עבירה.ב. לחשוד אי...,פרקליטות מחוז מרכז (פלילי),"3,000 ₪ קנס לאוצר המדינה.",[]


In [34]:
df["seifim"].apply(lambda s: s==[]).value_counts(normalize=True)

False    492
True      15
Name: seifim, dtype: int64

In [44]:
X = df["seifim"].apply(lambda seifim: {s.strip("סעיף' "):1 for s in seifim })
vectorizer = DictVectorizer(sparse=False)
vectorizer.fit(X)
df_seifim = pd.DataFrame(vectorizer.transform(X), columns=vectorizer.feature_names_)
df_seifim

Unnamed: 0,11(א),144(א),151,163(2),17,173(2),191,192,194(ב),2,2(1),2(1)(2),205ג(א),214,214(ב3),216(א),225,228,237,24(א)(1)(ד),243,244,25,267(א),273,275,283(2),284,286,288א(1),29,29(ג)(3),3,3(א),3(א)(2),3(א)(3,3(א)(3),3(א)(4),3(א)(5),3(א)(6)(א),...,4,400,413,414,415,416,418,420,425,430,441,447(א),47(א),499,499(א)(2),499(א)(2)לחוק,5,5(א),50,500(8),6(א)3,60,60(א)(3,60(א)(3),60(א)(4),60(א)(4)לפקודת,61,61(ה),62(2),62(ב)(1,62(ב)(3),7,7(א),7(ג),70(ג),75,98(ג),3(א)(4).1,338(א)(6),בירה
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
503,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
505,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
df_conditional_order = df.reset_index()\
  .join(df_seifim)\
  .rename(columns={"תיאור העובדות המהוות עבירה שבהן הודה החשוד": "txt", "מספר תיק":"id"})\
  .set_index("id")\
  .drop(["הוראות החיקוק שפורטו בהסדר", "נימוקים משתנים לסגירת התיק בהסדר", "תנאי ההסדר", "seifim", "יחידה"],axis=1)
df_conditional_order.head()

Unnamed: 0_level_0,txt,11(א),144(א),151,163(2),17,173(2),191,192,194(ב),2,2(1),2(1)(2),205ג(א),214,214(ב3),216(א),225,228,237,24(א)(1)(ד),243,244,25,267(א),273,275,283(2),284,286,288א(1),29,29(ג)(3),3,3(א),3(א)(2),3(א)(3,3(א)(3),3(א)(4),3(א)(5),...,4,400,413,414,415,416,418,420,425,430,441,447(א),47(א),499,499(א)(2),499(א)(2)לחוק,5,5(א),50,500(8),6(א)3,60,60(א)(3,60(א)(3),60(א)(4),60(א)(4)לפקודת,61,61(ה),62(2),62(ב)(1,62(ב)(3),7,7(א),7(ג),70(ג),75,98(ג),3(א)(4),338(א)(6),בירה
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
3373/15,"בתאריך 30.1.15, החזיק החשוד, לשם מסחר, בדוכן ב...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7937/15,"ביום 09/06/15 או סמוך לאחריו, מצא החשוד 12 המח...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4625/15,החשודות הינן סייעת וגננת בגן בצפון הארץ. ביום ...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3613/16,"1. במועדים הרלבנטיים לתלונת המתלוננת, עבד החשו...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9213/13,"הנאשמת הינה בעלת כלב מסוג ""רועה בלגי"". בשלהי ח...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
df_conditional_order.to_csv(__dir__ + "conditional_order_seifim.csv")

# Model

In [None]:
if "df_conditional_order" not in globals():
    df_conditional_order = pd.read_csv(__dir__ + "conditional_order_seifim.csv")

In [None]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer