Skip to content

Commit

Permalink
cleanup and added ipynb to scripts folder
Browse files Browse the repository at this point in the history
  • Loading branch information
wesleybeckner authored and wesleybeckner committed Nov 2, 2018
1 parent aa8d34c commit 0824ad8
Show file tree
Hide file tree
Showing 30 changed files with 4,319 additions and 3,546 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
scripts/development
# ignore all jupyter notebooks for now?
scripts/*.ipynb
# scripts/*.ipynb

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
6 changes: 6 additions & 0 deletions .pytest_cache/v/cache/lastfailed
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"salty/tests/test_data_manipulation.py": true,
"salty/tests/test_iupac_smiles.py": true,
"salty/tests/test_visualization_library.py::visualization_library_tests::test_1_parity_plot": true,
"salty/tests/test_visualization_library.py::visualization_library_tests::test_benchmark": true
}
4 changes: 4 additions & 0 deletions .pytest_cache/v/cache/nodeids
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
"salty/tests/test_visualization_library.py::visualization_library_tests::test_1_parity_plot",
"salty/tests/test_visualization_library.py::visualization_library_tests::test_benchmark"
]
101 changes: 0 additions & 101 deletions examples/salty_blog.ipynb

This file was deleted.

289 changes: 0 additions & 289 deletions examples/salty_multi-output.ipynb

This file was deleted.

33 changes: 0 additions & 33 deletions examples/salty_web_scraping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -469,39 +469,6 @@
"# new_df.dropna(inplace=True) #remove entires not in smiles database"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "UnboundLocalError",
"evalue": "local variable 'target_lookup' referenced before assignment",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/home/wesley/anaconda3/envs/py36/lib/python3.6/site-packages/salty_ilthermo-0.2.dev1-py3.6.egg/salty/salty.py\u001b[0m in \u001b[0;36mcheck_name\u001b[0;34m(user_query, index)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0mtarget_lookup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumn_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow_index\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0m_look_up_info_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_cation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/wesley/anaconda3/envs/py36/lib/python3.6/site-packages/salty_ilthermo-0.2.dev1-py3.6.egg/salty/salty.py\u001b[0m in \u001b[0;36m_look_up_info_file\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0minput_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0muser_query\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mcolumn_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_type\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mrow_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0muser_query\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/home/wesley/anaconda3/envs/py36/lib/python3.6/site-packages/salty_ilthermo-0.2.dev1-py3.6.egg/salty/salty.py\u001b[0m in \u001b[0;36mcheck_name\u001b[0;34m(user_query, index)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0mtarget_lookup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumn_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrow_index\u001b[0m \u001b[0;34m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 74\u001b[0;31m \u001b[0m_look_up_info_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_anion\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 75\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/home/wesley/anaconda3/envs/py36/lib/python3.6/site-packages/salty_ilthermo-0.2.dev1-py3.6.egg/salty/salty.py\u001b[0m in \u001b[0;36m_look_up_info_file\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0minput_type\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0muser_query\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 64\u001b[0;31m \u001b[0mcolumn_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_type\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 65\u001b[0m \u001b[0mrow_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0muser_query\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-abc66298e215>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msalty\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcheck_name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcheck_name\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"3-(3-aminopropyl)-1-methyl-1H-imidazolium\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/home/wesley/anaconda3/envs/py36/lib/python3.6/site-packages/salty_ilthermo-0.2.dev1-py3.6.egg/salty/salty.py\u001b[0m in \u001b[0;36mcheck_name\u001b[0;34m(user_query, index)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0m_look_up_info_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf_anion\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"query %s not found\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mtarget_lookup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcolumn_index\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'target_lookup' referenced before assignment"
]
}
],
"source": [
"from salty import check_name\n",
"check_name(\"3-(3-aminopropyl)-1-methyl-1H-imidazolium\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
Expand Down
65 changes: 39 additions & 26 deletions salty/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def assign_category(salts):
else:
category.append("Other")
missed.append(salts[label].iloc[i])
print("ILs labeled as other: {}\n{}".format(len(missed),missed))
print("ILs labeled as other: {}\n{}".format(len(missed), missed))
salts["category"] = category
return salts

Expand Down Expand Up @@ -108,44 +108,54 @@ def merge_duplicates(model_name, keep_descriptors=False):
if (devmodel.Data.iloc[:, -(4 + model_outputs):-4].max() < 700).all():
for output_index in range(model_outputs):
devmodel.Data.iloc[:, -(5 + output_index)] = \
devmodel.Data.iloc[:, -(5 + output_index)].apply(
lambda x: exp(float(x)))
devmodel.Data.iloc[:, -(5 + output_index)].apply(
lambda x: exp(float(x)))
output_val = pd.DataFrame()
output_xtd = pd.DataFrame()
for output_index in range(model_outputs):
val = devmodel.Data.groupby(['smiles-cation', 'smiles-anion']
)[cols[-(5 + output_index)]].mean().reset_index()
)[cols[-(5 + output_index)]].mean().\
reset_index()
xtd = devmodel.Data.groupby(['smiles-cation', 'smiles-anion']
)[cols[-(5 + output_index)]].std().reset_index()
)[cols[-(5 + output_index)]].std().\
reset_index()
if output_index == 0:
output_val = val
output_xtd = xtd
else:
output_val = pd.merge(output_val,val)
output_xtd = pd.merge(output_xtd,xtd)
output_val = pd.merge(output_val, val)
output_xtd = pd.merge(output_xtd, xtd)
size = devmodel.Data.groupby(['smiles-cation', 'smiles-anion']
)[cols[-(5 + output_index)]].count().reset_index()
)[cols[-(5 + output_index)]].count().\
reset_index()
cations = devmodel.Data.groupby(['smiles-cation', 'smiles-anion']
)['name-cation'].first().reset_index()
)['name-cation'].first().reset_index()
anions = devmodel.Data.groupby(['smiles-cation', 'smiles-anion']
)['name-anion'].first().reset_index()
)['name-anion'].first().reset_index()

size.columns.values[2] = "count"

salts = (devmodel.Data["smiles-cation"] + "." +
devmodel.Data["smiles-anion"]).unique() # grab unique salts
print("Identified {} unique salts in {} datapoints".format(len(salts),devmodel.Data.shape[0]))
out = pd.merge(output_val,output_xtd,on=['smiles-cation','smiles-anion'],suffixes=['_mean' , '_std'])
out = pd.merge(out,size)
out = pd.merge(out,cations)
out = pd.merge(out,anions)
salts = (devmodel.Data["smiles-cation"] + "." + devmodel.
Data["smiles-anion"]).unique()
print("Identified {} unique salts in {} datapoints".
format(len(salts), devmodel.Data.shape[0]))
out = pd.merge(output_val, output_xtd,
on=['smiles-cation', 'smiles-anion'],
suffixes=['_mean', '_std'])
out = pd.merge(out, size)
out = pd.merge(out, cations)
out = pd.merge(out, anions)
if keep_descriptors:
cationDescriptors = load_data("cationDescriptors.csv")
cationDescriptors.columns = [str(col) + '-cation' for col in cationDescriptors.columns]
cationDescriptors.columns = [str(col) + '-cation' for
col in cationDescriptors.columns]
anionDescriptors = load_data("anionDescriptors.csv")
anionDescriptors.columns = [str(col) + '-anion' for col in anionDescriptors.columns]
new_df = pd.merge(cationDescriptors, out, on=["name-cation","smiles-cation"], how="right")
new_df = pd.merge(anionDescriptors, new_df, on=["name-anion","smiles-anion"], how="right")
anionDescriptors.columns = [str(col) + '-anion' for
col in anionDescriptors.columns]
new_df = pd.merge(cationDescriptors, out,
on=["name-cation", "smiles-cation"], how="right")
new_df = pd.merge(anionDescriptors, new_df,
on=["name-anion", "smiles-anion"], how="right")
out = new_df
return out

Expand Down Expand Up @@ -275,12 +285,15 @@ def aggregate_data(data, T=[0, inf], P=[0, inf], data_ranges=None,
if scale_center:
for i in range(1, len(data) + 1):
dataDf.is_copy = False
dataDf.iloc[:, -i] = dataDf.iloc[:, -i].apply(lambda x: log(float(x)))
scaled_data = pd.DataFrame(instance.fit_transform(
dataDf.iloc[:, :-len(data)]), columns=cols[:-len(data)])
df = pd.concat([scaled_data, dataDf.iloc[:, -len(data):], metaDf], axis=1)
dataDf.iloc[:, -i] = dataDf.iloc[:, -i].apply(lambda x:
log(float(x)))
scaled_data = pd.DataFrame(instance.
fit_transform(dataDf.iloc[:, :-len(data)]),
columns=cols[:-len(data)])
df = pd.concat([scaled_data, dataDf.iloc[:, -len(data):], metaDf],
axis=1)
mean_std_of_coeffs = pd.DataFrame([instance.mean_, instance.scale_],
columns=cols[:-len(data)])
columns=cols[:-len(data)])
else:
instance.fit(dataDf.iloc[:, :-len(data)])
df = pd.concat([dataDf, metaDf], axis=1)
Expand Down

0 comments on commit 0824ad8

Please sign in to comment.