問題描述
使用 pymongo 用 CSV 數據更新 mongodb (update mongodb with CSV data using pymongo)
我正在嘗試使用 csv 文件中的數據更新 mongodb,我的 csv 文件包含 Ticket‑ID、類型、描述等列,如果我想要的集合中已經存在具有相同“Ticket‑ID”的文檔要更新該文檔中“Ticket‑ID”的其他列,或者我想插入一個新文檔,我嘗試了以下代碼
def fix_dict(self,data, ignore_duplicate_key=True):
if isinstance(data, (list, tuple)):
list2 = list()
for e in data:
list2.append(self.fix_dict(e))
# end if
return list2
if isinstance(data, dict):
# end if
for key, value in data.items():
value = self.fix_dict(value)
old_key = key
if "." in key:
key = old_key.replace(".", "")
if key not in data:
data[key] = value
else:
error_msg = "Dict key {key} containing a \".\" was ignored, as {replacement} already exists".format(
key=key_old, replacement=key)
if force:
import warnings
warnings.warn(error_msg, category=RuntimeWarning)
else:
raise ValueError(error_msg)
# end if
# end if
del data[old_key]
# end if
data[key] = value
# end for
return data
# end if
return data
# end def
def import_content(self,filename, Database, Collection):
filepath = os.path.join(CurrentFilePath, filename)
mongo_client = pymongo.MongoClient()
mongo_db = mongo_client[Database]
db_cm = mongo_db[Collection]
data = pd.read_csv(filepath,encoding='utf‑8‑sig')
datajson = data.to_json(orient='records', force_ascii = False)
data_json = json.loads(datajson)
updatedata = self.fix_dict(data_json)
for d in updatedata:
print d['Ticket‑ID']
db_cm.update_many({'Ticket‑ID' : d['Ticket‑ID']}, updatedata , upsert = True)
print "Data Inserted Successfully"
我的更新數據包含以下數據:
>>print updatedata
>>[{u'Status': u'Closed', u'Lastname+': u'xxxx', u'Assigned To Individual': u'xxxxx', u'StatusAttribute': None, u'Modification Date': u'2016/10/31 17:24:30', u'Creation Date': u'2016/10/31 16:37:40', u'Ticket‑ID': 3529, u'Firstname+': u'yyyy', u'Priority': 3, u'Short Description+': u'yyyyyyyyyyyyyy', u'Ticket‑Typ': u'Incident', u'Department': u'aaa', u'Remark 2': u'sdf', u'Assigned To Group': u'xyz'}, {u'Status': u'Closed', u'Lastname+': u'abc', u'Assigned To Individual': u'abc', u'StatusAttribute': None, u'Modification Date': u'2016/11/01 16:50:48', u'Creation Date': u'2016/10/31 17:14:59', u'Ticket‑ID': 3529, u'Firstname+': u'abc', u'Priority': 3, u'Short Description+': u'xxxxxxxxxxxx', u'Ticket‑Typ': u'Incident', u'Department': u'dhdh', u'Remark 2': u'fff', u'Assigned To Group': u'abc'}]
我面臨以下錯誤
Traceback (most recent call last):
File "C:\Users\ssrujan\Desktop\CISM\CSVtoMongodb.py", line 119, in <module>
CSVtoMongodb.import_content(filename,Database,Collection)
File "C:\Users\ssrujan\Desktop\CISM\CSVtoMongodb.py", line 109, in import_content
db_cm.update_many({'Ticket‑ID' : d['Ticket‑ID']}, updatedata , upsert = True)
File "C:\Python27\lib\site‑packages\pymongo\collection.py", line 887, in update_many
common.validate_ok_for_update(update)
File "C:\Python27\lib\site‑packages\pymongo\common.py", line 413, in validate_ok_for_update
validate_is_mapping("update", update)
File "C:\Python27\lib\site‑packages\pymongo\common.py", line 389, in validate_is_mapping
"collections.Mapping" % (option,))
TypeError: update must be an instance of dict, bson.son.SON, or other type that inherits from collections.Mapping
我是mongodb的新手,請幫忙。
參考解法
方法 1:
Instead of update_many
, use replace_one
:
db_cm.replace_one({'Ticket‑ID' : d['Ticket‑ID']}, updatedata , upsert=True)
For more info consult the replace_one documentation.
(by srujana、A. Jesse Jiryu Davis)