In [1]:
import pandas as pd
import datetime
import os

### Rename columns

In [2]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file)
        data.rename(columns={' Close/Last':'Close',' Volume':'Volume',' Open':'Open',' High':'High',' Low':'Low'}, inplace=True)
        data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        data.to_csv(file, index=False)

#### Remove dollar sign

In [3]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file, parse_dates=[0])
        data[data.columns[1:5]] = data[data.columns[1:5]].replace('[^.0-9]', '', regex=True).astype(float)
        data.to_csv(file, index=False)

#### If *ValueError: could not convert string to float:* Try individual files with this cell

In [None]:
data = pd.read_csv('ABMD.csv', parse_dates=[0])
data[data.columns[1:5]] = data[data.columns[1:5]].replace('[^.0-9]', '', regex=True).astype(float)
data.to_csv('ABMD.csv', index=False)

### Try removing whitespace in the colums

In [None]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file, parse_dates=[0])
        data['Open'] = data['Open'].str.strip()
        data['High'] = data['High'].str.strip()
        data['Low'] = data['Low'].str.strip()
        data['Close'] = data['Close'].str.strip()
        data[data.columns[1:5]] = data[data.columns[1:5]].replace('[^.0-9]', '', regex=True).astype(float)
        data.to_csv(file, index=False)

### Individually

In [None]:
data = pd.read_csv('REGN.csv', parse_dates=[0])
data['Open'] = data['Open'].str.strip()
data['High'] = data['High'].str.strip()
data['Low'] = data['Low'].str.strip()
data['Close'] = data['Close'].str.strip()
data.to_csv('REGN.csv', index=False)

### Reverse Sort (oldest date on top)

In [4]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file)
        data = data.iloc[::-1]
        data.reset_index(inplace=True, drop=True)
        data.to_csv(file, index=False)

### Remove index row 
only necessary if index=True

data.to_csv(file, index=__**True**__) 

In [None]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file_location + file)
        data.drop("Symbol", axis=1, inplace=True)
        data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        data.to_csv(file_location + file, index=False)

### Fix date (remove timestamp from datetime)

In [5]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file, parse_dates=[0])
        data['Date'] = pd.to_datetime(data['Date'])
        data['Date'] = data['Date'].dt.date
        data.to_csv(file, index=False, date_format='%Y-%m-%d')

#### Only grab desired columns

In [None]:
cols = ['Symbol', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume']
data = pd.read_csv('NASDAQ_20191230.txt', names=cols) # Must name cols, otherwise it uses first row as names
daydata = data.loc[data['Symbol'] == 'AMRN','Date':'Volume'] # find AMRN and get Date to Volume columns
daydata

### Replace 0 with nan then remove those rows (holidays)

In [None]:
for file in os.listdir('.'):
    if file.endswith('.csv'):
        data = pd.read_csv(file, parse_dates=[0])
        data.replace(0, np.nan, inplace=True)
        data.dropna(how='any', inplace=True)
        data.to_csv(file, index=False, date_format='%Y-%m-%d')