Create an empty Data frame with date index:
import pandas as pd
def test_run():
start_date='2017-11-24'
end_data='2017-11-28'
dates=pd.date_range(start_date, end_data)
df1=pd.DataFrame(index=dates)
print(df1)
"""
Empty DataFrame
Columns: []
Index: [2010-01-22 00:00:00, 2010-01-23 00:00:00, 2010-01-24 00:00:00, 2010-01-25 00:00:00, 2010-01-26 00:00:00]
"""
Now we want to load SPY.csv and get 'Adj Close' column value and copy the range (11-21, 11-28) data to the empty data frame:
import pandas as pd
def test_run():
start_date='2017-11-24'
end_data='2017-11-28'
dates=pd.date_range(start_date, end_data)
# Create an empty data frame
df1=pd.DataFrame(index=dates)
# Load csv file
dspy=pd.read\_csv('data/SPY.csv',
index\_col="Date",
parse\_dates=True,
usecols=\['Date', 'Adj Close'\],
na\_values=\['nan'\])
# print(dspy)
"""
Adj Close
Date
2017-11-16 258.619995
2017-11-17 257.859985
2017-11-20 258.299988
"""
# join the table
df1=df1.join(dspy)
#print(df1)
"""
Adj Close
2017-11-24 260.359985
2017-11-25 NaN
2017-11-26 NaN
2017-11-27 260.230011
"""
# drop the nan row
df1=df1.dropna()
print(df1)
"""
Adj Close
2017-11-24 260.359985
2017-11-27 260.230011
2017-11-28 262.869995
"""
if __name__ == '__main__':
test_run()
There is a simpy way to drop the data which index is not present in dspy:
df1=df1.join(dspy, how='inner')
We can also rename the 'Adj Close' to prevent conflicts:
# rename the column
dspy=dspy.rename(columns={'Adj Close': 'SPY'})
Load more stocks:
import pandas as pd
def test_run():
start_date='2017-11-24'
end_data='2017-11-28'
dates=pd.date_range(start_date, end_data)
# Create an empty data frame
df1=pd.DataFrame(index=dates)
# Load csv file
dspy=pd.read\_csv('data/spy.csv',
index\_col="Date",
parse\_dates=True,
usecols=\['Date', 'Adj Close'\],
na\_values=\['nan'\])
# print(dspy)
"""
Adj Close
Date
2017-11-16 258.619995
2017-11-17 257.859985
2017-11-20 258.299988
"""
# rename the column
dspy=dspy.rename(columns={'Adj Close': 'spy'})
# join the table
df1=df1.join(dspy, how='inner')
# print(df1)
"""
Adj Close
2017-11-24 260.359985
2017-11-27 260.230011
2017-11-28 262.869995
"""
symbols=\['aapl', 'ibm'\]
for symbol in symbols:
temp=pd.read\_csv('data/{0}.csv'.format(symbol), index\_col="Date", parse\_dates=True, usecols=\['Date', 'Adj Close'\], na\_values=\['nan'\])
temp=temp.rename(columns={'Adj Close': symbol})
df1=df1.join(temp)
print(df1)
"""
spy aapl ibm
2017-11-24 260.359985 174.970001 151.839996
2017-11-27 260.230011 174.089996 151.979996
2017-11-28 262.869995 173.070007 152.470001
"""
if __name__ == '__main__':
test_run()
手机扫一扫
移动阅读更方便
你可能感兴趣的文章