Commit e40ed067 authored by Christopher Barnett's avatar Christopher Barnett
Browse files

changed ingest to support ingest of "personae"

parent cd919aa6
.idea/
node_modules/
/data/npm-debug.log
/ingest/old_wpf_arms_deals.csv
/data/old_*
/ingest/old_*
/ingest/~$*.xlsx
This diff is collapsed.
......@@ -201,11 +201,13 @@ def convert_countries(json_data):
for i in buyers:
buyers_.append(i.strip())
buyers = buyers_
sellers = row['sellers'].split(';')
sellers_ = []
for i in sellers:
sellers_.append(i.strip())
sellers = sellers_
transactions = []
for b in buyers:
for s in sellers:
......@@ -228,3 +230,7 @@ class CountryProcessor:
except LookupError:
print('Could not find country: {}'.format(country))
return None
def persona_processor_xl(json_data):
return json_data
from ingest.wpf_ingest.ingest_data import ExcelToJSON, deals_processor_xl, JSONProcessor, convert_countries
from ingest.wpf_ingest.ingest_data import ExcelToJSON, deals_processor_xl, JSONProcessor, convert_countries, \
persona_processor_xl
DEALS_PATH = 'data/wpf_deals.json'
PERSONAE_PATH = 'data/personae.json'
EXCEL_PATH = 'ingest/wpf_arms_deals.xlsx'
'''
usage:
from ingest.wpf_ingest.ingest_from_excel import *
ingest_excel()
ingest_deals()
'''
def ingest_excel(path=EXCEL_PATH):
ing = ExcelToJSON(excel_path=path, json_path=DEALS_PATH, df_processor=deals_processor_xl)
def ingest_deals(path=EXCEL_PATH):
ing = ExcelToJSON(excel_path=path, json_path=DEALS_PATH, sheet=0, df_processor=deals_processor_xl)
ing.write_json()
ingest_personae(deals_json=ing.get_data())
jp = JSONProcessor(json_path=ing.json_path)
jp.process(__wpf_process)
......@@ -28,3 +31,49 @@ def __wpf_process(json_data):
for e in equip:
equip_.append(e.strip())
row['equipment'] = equip_
def ingest_personae(path=EXCEL_PATH, deals_json=None):
ing = ExcelToJSON(excel_path=path, json_path=PERSONAE_PATH, sheet=1, df_processor=persona_processor_xl)
data = ing.get_data()['data']
prefix = 'details_'
for row in data:
row['connections'] = []
keys = list(row.keys())
for k in keys:
if k.startswith(prefix):
id_val = k[len(prefix):]
details_val = row[k]
add = False
del row[k]
if type(details_val) is bool:
details_val = ''
add = True
else:
details_val = details_val.strip()
if len(details_val) > 0:
add = True
if add:
match = find_deal_by(deals_json, 'series_id', id_val)
if match is not None:
# add connection details
conn = {
'blog_link': match['blog_link'],
'details': details_val
}
row['connections'].append(conn)
ing.write_json()
# jp = JSONProcessor(json_path=ing.json_path)
# jp.process(__wpf_process)
return 'finished!'
def find_deal_by(deals_json, key, value):
deals = deals_json['data']
for row in deals:
if str(row[key]).strip() == str(value).strip():
return row
return None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment