Univention Bugzilla – Attachment 8069 Details for
Bug 41882
ucs-school-import: lower memory usage of import
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
massively lower memory footprint of user import
41882_lower_memory_usage_of_user-import.patch (text/plain), 14.69 KB, created by
Daniel Tröder
on 2016-10-05 20:16:56 CEST
(
hide
)
Description:
massively lower memory footprint of user import
Filename:
MIME Type:
Creator:
Daniel Tröder
Created:
2016-10-05 20:16:56 CEST
Size:
14.69 KB
patch
obsolete
>Index: modules/ucsschool/importer/mass_import/mass_import.py >=================================================================== >--- modules/ucsschool/importer/mass_import/mass_import.py (Revision 72949) >+++ modules/ucsschool/importer/mass_import/mass_import.py (Arbeitskopie) >@@ -31,6 +31,7 @@ > # /usr/share/common-licenses/AGPL-3; if not, see > # <http://www.gnu.org/licenses/>. > >+import sys > import datetime > > from ucsschool.importer.factory import Factory >@@ -37,6 +38,7 @@ > from ucsschool.importer.configuration import Configuration > from ucsschool.importer.utils.logging import get_logger > from ucsschool.lib.models.utils import stopped_notifier >+from memory_profiler import profile, LogFile > > > class MassImport(object): >@@ -56,6 +58,7 @@ > self.result_exporter = self.factory.make_result_exporter() > self.password_exporter = self.factory.make_password_exporter() > self.errors = list() >+ sys.stdout = LogFile(self.logger.name, reportIncrementFlag=False) > > def mass_import(self): > with stopped_notifier(): >@@ -89,6 +92,7 @@ > def import_routers(self): > pass > >+ @profile(precision=3) > def import_users(self): > self.logger.info("------ Importing users... ------") > user_import = self.factory.make_user_importer(self.dry_run) >@@ -98,7 +102,7 @@ > user_import.create_and_modify_users(imported_users) > self.errors.extend(user_import.errors) > user_import.log_stats() >- if self.config["output"]["new_user_passwords"]: >+ if False and self.config["output"]["new_user_passwords"]: # TODO > nup = datetime.datetime.now().strftime(self.config["output"]["new_user_passwords"]) > self.logger.info("------ Writing new users passwords to %s... ------", nup) > self.password_exporter.dump(user_import, nup) >Index: modules/ucsschool/importer/mass_import/user_import.py >=================================================================== >--- modules/ucsschool/importer/mass_import/user_import.py (Revision 72949) >+++ modules/ucsschool/importer/mass_import/user_import.py (Arbeitskopie) >@@ -43,6 +43,7 @@ > from ucsschool.importer.configuration import Configuration > from ucsschool.importer.utils.logging import get_logger > from ucsschool.importer.utils.ldap_connection import get_admin_connection >+from memory_profiler import profile, LogFile > > > class UserImport(object): >@@ -61,7 +62,9 @@ > self.connection, self.position = get_admin_connection() > self.factory = Factory() > self.reader = self.factory.make_reader() >+ sys.stdout = LogFile(self.logger.name, reportIncrementFlag=False) > >+ @profile(precision=3) > def read_input(self): > """ > Read users from input data. >@@ -86,6 +89,7 @@ > self.logger.info("------ Read %d users from input data. ------", len(self.imported_users)) > return self.imported_users > >+ @profile(precision=3) > def create_and_modify_users(self, imported_users=None): > """ > Create and modify users. >@@ -98,11 +102,15 @@ > :return tuple: (self.errors, self.added_users, self.modified_users) > """ > self.logger.info("------ Creating / modifying users... ------") >- for imported_user in imported_users: >+ usernum = 0 >+ total = len(imported_users) >+ while imported_users: >+ imported_user = imported_users.pop(0) >+ usernum += 1 > if imported_user.action == "D": > continue > try: >- self.logger.debug("Creating / modifying user %s...", imported_user) >+ self.logger.debug("Creating / modifying user %d/%d %s...", usernum, total, imported_user) > user = self.determine_add_modify_action(imported_user) > cls_name = user.__class__.__name__ > >@@ -146,14 +154,12 @@ > user.record_uid, exc), validation_error=exc, import_user=user), sys.exc_info()[2] > > if success: >- self.logger.info("Success %s %s (source_uid:%s record_uid: %s).", action_str.lower(), user, >- user.source_uid, user.record_uid) >- store.append(user) >+ self.logger.info("Success %s %d/%d %s (source_uid:%s record_uid: %s).", action_str.lower(), usernum, total, user, user.source_uid, user.record_uid) >+ store.append(user.to_dict_all()) > else: >- raise err("Error {} {} (source_uid:{} record_uid: {}), does probably {}exist.".format( >- action_str.lower(), user, user.source_uid, user.record_uid, >+ raise err("Error {} {}/{} {} (source_uid:{} record_uid: {}), does probably {}exist.".format( >+ action_str.lower(), usernum, len(imported_users), user, user.source_uid, user.record_uid, > "not " if user.action == "M" else "already "), entry=user.entry_count, import_user=user) >- > except (CreationError, ModificationError) as exc: > self.logger.error("Entry #%d: %s", exc.entry, exc) # traceback useless > self._add_error(exc) >@@ -197,6 +203,7 @@ > user.action = "A" > return user > >+ @profile(precision=3) > def detect_users_to_delete(self): > """ > Find difference between source database and UCS user database. >@@ -205,7 +212,6 @@ > """ > self.logger.info("------ Detecting which users to delete... ------") > users_to_delete = list() >- a_user = self.factory.make_import_user([]) > > if self.config["no_delete"]: > self.logger.info("------ Looking only for users with action='D' (no_delete=%r) ------", >@@ -213,9 +219,7 @@ > for user in self.imported_users: > if user.action == "D": > try: >- ldap_user = a_user.get_by_import_id(self.connection, user.source_uid, user.record_uid) >- ldap_user.update(user) # need user.input_data for hooks >- users_to_delete.append(ldap_user) >+ users_to_delete.append((user.source_uid, user.record_uid)) > except noObject: > msg = "User to delete not found in LDAP: {}.".format(user) > self.logger.error(msg) >@@ -235,19 +239,11 @@ > ucs_ldap_users = self.connection.search(filter_s, attr=attr) > ucs_user_ids = set([(lu[1]["ucsschoolSourceUID"][0], lu[1]["ucsschoolRecordUID"][0]) for lu in ucs_ldap_users]) > >- # collect ucschool objects for those users to delete in imported_users >- for ucs_id_not_in_import in (ucs_user_ids - imported_user_ids): >- try: >- ldap_user = a_user.get_by_import_id(self.connection, ucs_id_not_in_import[0], ucs_id_not_in_import[1]) >- ldap_user.action = "D" # mark for logging/csv-output purposes >- users_to_delete.append(ldap_user) >- except noObject as exc: >- self.logger.error("Cannot delete non existing user with source_uid=%r, record_uid=%r: %s", >- ucs_id_not_in_import[0], ucs_id_not_in_import[1], exc) >- >+ users_to_delete = list(ucs_user_ids - imported_user_ids) > self.logger.debug("users_to_delete=%r", users_to_delete) > return users_to_delete > >+ @profile(precision=3) > def delete_users(self, users=None): > """ > Delete users. >@@ -261,17 +257,25 @@ > :return: tuple: (self.errors, self.deleted_users) > """ > self.logger.info("------ Deleting %d users... ------", len(users)) >- for user in users: >+ a_user = self.factory.make_import_user([]) >+ for num, ucs_id_not_in_import in enumerate(users): > try: >+ user = a_user.get_by_import_id(self.connection, ucs_id_not_in_import[0], ucs_id_not_in_import[1]) >+ user.action = "D" # mark for logging/csv-output purposes >+ except noObject as exc: >+ self.logger.error("Cannot delete non existing user with source_uid=%r, record_uid=%r: %s", >+ ucs_id_not_in_import[0], ucs_id_not_in_import[1], exc) >+ continue >+ try: > success = self.do_delete(user) > if success: >- self.logger.info("Success deleting user %r (source_uid:%s record_uid: %s).", user.name, >- user.source_uid, user.record_uid) >+ self.logger.info("Success deleting %d/%d %r (source_uid:%s record_uid: %s).", num, len(users), >+ user.name, user.source_uid, user.record_uid) > else: > raise DeletionError("Error deleting user '{}' (source_uid:{} record_uid: {}), has probably already " > "been deleted.".format(user.name, user.source_uid, user.record_uid), entry=user.entry_count, > import_user=user) >- self.deleted_users[user.__class__.__name__].append(user) >+ self.deleted_users[user.__class__.__name__].append(user.to_dict_all()) > except UcsSchoolImportError as exc: > self.logger.exception("Error in entry #%d: %s", exc.entry, exc) > self._add_error(exc) >@@ -352,6 +356,7 @@ > raise UnknownDeleteSetting("Don't know what to do with user_deletion=%r and expiration=%r.".format( > self.config["user_deletion"]["delete"], self.config["user_deletion"]["expiration"]), > entry=user.entry_count, import_user=user) >+ user.invalidate_all_caches() > return success > > def log_stats(self): >@@ -368,13 +373,13 @@ > for cls_name in sorted(cls_names): > self.logger.info("Created %s: %d", cls_name, len(self.added_users.get(cls_name, []))) > for i in range(0, len(self.added_users[cls_name]), columns): >- self.logger.info(" %s", [iu.name for iu in self.added_users[cls_name][i:i+columns]]) >+ self.logger.info(" %s", [iu["name"] for iu in self.added_users[cls_name][i:i+columns]]) > self.logger.info("Modified %s: %d", cls_name, len(self.modified_users.get(cls_name, []))) > for i in range(0, len(self.modified_users[cls_name]), columns): >- self.logger.info(" %s", [iu.name for iu in self.modified_users[cls_name][i:i+columns]]) >+ self.logger.info(" %s", [iu["name"] for iu in self.modified_users[cls_name][i:i+columns]]) > self.logger.info("Deleted %s: %d", cls_name, len(self.deleted_users.get(cls_name, []))) > for i in range(0, len(self.deleted_users[cls_name]), columns): >- self.logger.info(" %s", [iu.name for iu in self.deleted_users[cls_name][i:i+columns]]) >+ self.logger.info(" %s", [iu["name"] for iu in self.deleted_users[cls_name][i:i+columns]]) > self.logger.info("Errors: %d", len(self.errors)) > if self.errors: > self.logger.info("Entry #: Error description") >Index: modules/ucsschool/importer/models/import_user.py >=================================================================== >--- modules/ucsschool/importer/models/import_user.py (Revision 72949) >+++ modules/ucsschool/importer/models/import_user.py (Arbeitskopie) >@@ -80,6 +80,8 @@ > reader = None > logger = None > _pyhook_cache = None >+ _additional_props = ("action", "entry_count", "udm_properties", "input_data", "old_user", "in_hook") >+ prop = uadmin_property("_replace") > > def __init__(self, name=None, school=None, **kwargs): > self.action = None # "A", "D" or "M" >@@ -88,13 +90,21 @@ > self.input_data = list() # raw input data created by SomeReader.read() > self.old_user = None # user in LDAP, when modifying > self.in_hook = False # if a hook is currently running >+ >+ for attr in self._additional_props: >+ try: >+ val = kwargs.pop(attr) >+ setattr(self, attr, val) >+ except KeyError: >+ pass >+ > if not self.factory: >- self.factory = Factory() >- self.ucr = self.factory.make_ucr() >- self.config = Configuration() >- self.reader = self.factory.make_reader() >- self.logger = get_logger() >- self.username_max_length = 20 - len(self.ucr.get("ucsschool/ldap/default/userprefix/exam", "exam-")) >+ self.__class__.factory = Factory() >+ self.__class__.ucr = self.factory.make_ucr() >+ self.__class__.config = Configuration() >+ self.__class__.reader = self.factory.make_reader() >+ self.__class__.logger = get_logger() >+ self.__class__.username_max_length = 20 - len(self.ucr.get("ucsschool/ldap/default/userprefix/exam", "exam-")) > self._lo = None > self._userexpiry = None > super(ImportUser, self).__init__(name, school, **kwargs) >@@ -186,6 +196,24 @@ > """ > self._userexpiry = expiry > >+ @classmethod >+ def from_dict(cls, a_dict): >+ if "Staff" in a_dict["type_name"]: >+ if "Teacher" in a_dict["type_name"]: >+ kls = ImportTeachersAndStaff >+ else: >+ kls = ImportStaff >+ elif "Student" in a_dict["type_name"]: >+ kls = ImportStudent >+ elif "Teacher" in a_dict["type_name"]: >+ kls = ImportTeacher >+ else: >+ kls = cls >+ a_dict.pop("$dn$", None) >+ a_dict.pop("type", None) >+ a_dict.pop("type_name", None) >+ return kls(**a_dict) >+ > def _alter_udm_obj(self, udm_obj): > super(ImportUser, self)._alter_udm_obj(udm_obj) > if self._userexpiry is not None: >@@ -479,7 +507,7 @@ > raise FormatError("No username was created from scheme '{}'.".format( > self.username_scheme), self.username_scheme, self.to_dict()) > if not self.username_handler: >- self.username_handler = self.factory.make_username_handler(self.username_max_length) >+ self.__class__.username_handler = self.factory.make_username_handler(self.username_max_length) > self.name = self.username_handler.format_username(self.name) > > def modify(self, lo, validate=True, move_if_necessary=None): >@@ -507,8 +535,8 @@ > self._lo = lo > return super(ImportUser, self).move(lo, udm_obj, force) > >- @staticmethod >- def normalize(s): >+ @classmethod >+ def normalize(cls, s): > """ > Normalize string (german umlauts etc) > >@@ -516,8 +544,7 @@ > :return: str: normalized s > """ > if isinstance(s, basestring): >- prop = uadmin_property("_replace") >- s = prop._replace("<:umlauts>{}".format(s), {}) >+ s = cls.prop._replace("<:umlauts>{}".format(s), {}) > return s > > def normalize_udm_properties(self): >@@ -665,12 +692,11 @@ > all_fields = self.reader.get_data_mapping(self.input_data) > else: > all_fields = dict() >- all_fields.update(self.to_dict().copy()) >+ all_fields.update(self.to_dict()) > all_fields.update(self.udm_properties) > all_fields.update(kwargs) > >- prop = uadmin_property("_replace") >- res = prop._replace(scheme, all_fields) >+ res = self.prop._replace(scheme, all_fields) > if not res: > self.logger.warn("Created empty '{prop_name}' from scheme '{scheme}' and input data {data}. ".format( > prop_name=prop_name, scheme=scheme, data=all_fields)) >@@ -755,6 +781,13 @@ > raise UnknownProperty("UDM properties could not be set. Unknown property: '{}'".format(exc), > entry=self.entry_count, import_user = self) > >+ def to_dict_all(self): >+ res = self.to_dict() >+ for attr in self._additional_props: >+ res[attr] = getattr(self, attr) >+ self.logger.debug("res=%r", res) >+ return res >+ > def update(self, other): > """ > Copy attributes of other ImportUser into this one. >Index: modules/ucsschool/importer/writer/user_import_csv_result_exporter.py >=================================================================== >--- modules/ucsschool/importer/writer/user_import_csv_result_exporter.py (Revision 72949) >+++ modules/ucsschool/importer/writer/user_import_csv_result_exporter.py (Arbeitskopie) >@@ -72,7 +72,7 @@ > for users in [user_import.added_users, user_import.modified_users, user_import.deleted_users]: > tmp = list() > map(tmp.extend, [u for u in users.values() if u]) >- li.extend(sorted(tmp, key=lambda x: x.entry_count)) >+ li.extend(tmp) > return li > > def get_writer(self): >@@ -95,6 +95,9 @@ > user = obj > elif isinstance(obj, UcsSchoolImportError): > user = obj.import_user >+ elif isinstance(obj, dict): >+ user = self.factory.make_import_user([]).from_dict(obj) >+ obj = user > else: > raise TypeError("Expected ImportUser or UcsSchoolImportError, got {}. Repr: {}".format(type(obj), repr(obj))) > if not user:
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 41882
: 8069