How do you take an email_list with emails in the format [email protected] and append unique names to a new update_list? I would use this update_list and convert it to CamelCase, but I'm not sure how to take only part of an index to search for duplicates. Is there some way to use regex? Keep getting TpeError: expected string or bytes-like object.
import re
input_list = []
email_list = []
dup_email_list = []
domain_gmail = []
domain_outlook = []
dup_domain_gmail = []
dup_domain_outlook = []
update_list = []
camel_list = []
n = 0
while n < 5:
input_list = []
email = []
# input_string split by ','; ignores whitespace
input_string = input('enter first, last name, ID and email domain: ')
if input_string == 'done':
n=5
break
else:
input_list = [x.strip() for x in input_string.split(',')]
print(input_list)
# convert input_list into email format [email protected]
email = "{0}.{1}@{3}.com".format(*input_list)
# convert email to lowercase
email_lower =email.lower()
print(email)
# check ID validity (9 digits)
if input_list[2].isdigit() and len(input_list[2]) == 9:
print('valid ID')
continue
else:
print('invalid ID')
n = 0
# check domain validity (gmail or outlook)
if input_list[3] == 'gmail':
email_list.append(email)
domain_gmail.append(email)
n = 0
elif input_list[3] == 'outlook':
email_list.append(email)
domain_outlook.append(email)
n = 0
else:
print('invalid domain!')
n = 0
if n == 5:
# append unique email_list indexes to dup_email_list
for x in email_list:
if x not in dup_email_list:
dup_email_list.append(x)
# append unique emails from domain_gmail to new list
for x in domain_gmail:
if x not in dup_domain_gmail:
dup_domain_gmail.append(x)
# append unique emails from domain_outlook to new list
for x in domain_outlook:
if x not in dup_domain_outlook:
dup_domain_outlook.append(x)
# append dup_email_list to update_list
for string in dup_email_list:
update_list = re.match(r'[a-z]{1}[.]{1}[a-z]{1}', dup_email_list)
# append names from update_list to camel_list in CamelCase format FirstLast
for x in dup_email_list:
while i < len.update_list[i]:
camel_list = re.split(r'[a-z]{1}[.]{1}[a-z]{1}', dup_email_list)
# print cases
print('mail list: ', dup_email_list)
print('After grouping: ', dup_domain_gmail, dup_domain_outlook)
print('After updating: ', update_list)
print('CamelCase list: ', camel_list)
CodePudding user response:
This should work... I actually didn't end up using regex at all and just stuck to string methods to split the email address and convert it to camel case...
I also did some minor cleanups through the rest of your script as well mostly just removing unnecessary lines.
email_list = []
dup_email_list = []
domain_gmail = []
domain_outlook = []
dup_domain_gmail = []
dup_domain_outlook = []
update_list = []
camel_list = []
n = 0
while True:
# input_string split by ','; ignores whitespace
input_string = input('enter first, last name, ID and email domain: ')
if input_string == 'done':
n=5
break
input_list = [x.strip() for x in input_string.split(',')]
print(input_list)
# convert input_list into email format [email protected]
email = "{0}.{1}@{3}.com".format(*input_list)
# convert email to lowercase
email_lower = email.lower()
print(email)
# check ID validity (9 digits)
if input_list[2].isdigit() and len(input_list[2]) == 9:
print('valid ID')
else:
print('invalid ID')
continue
# check domain validity (gmail or outlook)
if input_list[3] not in ['gmail', 'outlook']:
print('invalid domain!')
continue
email_list.append(email)
if input_list[3] == 'gmail':
domain_gmail.append(email)
else:
domain_outlook.append(email)
# append unique email_list indexes to dup_email_list
for x in email_list:
if x not in dup_email_list:
dup_email_list.append(x)
# append unique emails from domain_gmail to new list
for x in domain_gmail:
if x not in dup_domain_gmail:
dup_domain_gmail.append(x)
# append unique emails from domain_outlook to new list
for x in domain_outlook:
if x not in dup_domain_outlook:
dup_domain_outlook.append(x)
# append dup_email_list to update_list
update_list = dup_email_list
# append names from update_list to camel_list in CamelCase format FirstLast
for x in update_list:
name, domain = x.split('@')
first, last = name.split('.')
name = first.title() last.title()
camel_email = "@".join([name, domain])
camel_list.append(camel_email)
# print cases
print('mail list: ', dup_email_list)
print('After grouping: ', dup_domain_gmail, dup_domain_outlook)
print('After updating: ', update_list)
print('CamelCase list: ', camel_list)
CodePudding user response:
Having the script as one big function makes it hard to test and debug.
I would suggest you break it up into smaller functions that are easier to test. Have small functions that process the data. Then have a main function that has the business logic of the script.
I would also use a Python dataclass to store the information rather than lists. This will make it a little more readable. That dataclass will also become the single source of truth. Then to obtain the other lists, have functions that extract the required information.
To get unique list of strings the Python set
functionality can be used which removed duplicates from a list.
An example of what this might look like is:
from dataclasses import dataclass
@dataclass
class UserInfo:
first_name: str
last_name: str
user_id: int
domain: str
email: str = ''
def __post_init__(self):
self.email = f"{self.first_name}.{self.last_name}@{self.domain}.com".casefold()
def check_id_valid(user_info):
if not len(str(user_info.user_id)) == 9:
print("User ID needs to be 9 digits")
return False
return True
def check_domain_valid(user_info):
valid_domain = ['gmail', 'outlook']
if user_info.domain not in valid_domain:
print(f'{user_info.domain} is not a valid domain')
return False
return True
def user_input():
# Keep looking round asking for information until it is the correct
# format or `done` is entered
while True:
input_string = input('enter first, last name, ID and email domain: ')
if input_string.casefold() == 'done':
return None
input_list = [field.strip() for field in input_string.split(',')]
if len(input_list) == 4 and input_list[2].isdigit():
user_info = UserInfo(first_name=input_list[0],
last_name=input_list[1],
user_id=int(input_list[2]),
domain=input_list[3])
if all((check_id_valid(user_info), check_domain_valid(user_info))):
return user_info
print('Enter the four fields separated by commas (,)')
def get_unique_emails(group_info):
unique_emails = set()
for user_info in group_info:
unique_emails.add(user_info.email)
return unique_emails
def get_domain(domain_name, group_info):
email_in_domian = set()
for user_info in group_info:
if domain_name == user_info.domain:
email_in_domian.add(user_info.email)
return email_in_domian
def create_camelcase_email(group_info):
unique_emails = set()
for user_info in group_info:
unique_emails.add((f'{user_info.first_name.title()}'
f'{user_info.last_name.title()}@'
f'{user_info.domain}.com'))
return unique_emails
def main():
group_info = []
collect_data = True
while collect_data:
user_info = user_input()
if user_info:
group_info.append(user_info)
else:
collect_data = False
if len(group_info) == 5:
collect_data = False
print('mail list:', get_unique_emails(group_info))
for domain in ['gmail', 'outlook']:
emails = get_domain(domain, group_info)
if emails:
print(f'Group {domain}: {emails}')
print('CamelCase list: ', create_camelcase_email(group_info))
if __name__ == '__main__':
main()
Below is a transcript from a test I did:
enter first, last name, ID and email domain: sam, smith, 123456789, outlook
enter first, last name, ID and email domain: sam, smith, 123456789, outlook
enter first, last name, ID and email domain: sam, smith, 123456789, gmail
enter first, last name, ID and email domain: sally, smith, 123456789, gmail
enter first, last name, ID and email domain: sally, smith, 12345, gmail
User ID needs to be 9 digits
Enter the four fields separated by commas (,)
enter first, last name, ID and email domain: sally, smith, 123456789, gmail
mail list: {'[email protected]', '[email protected]', '[email protected]'}
Group gmail: {'[email protected]', '[email protected]'}
Group outlook: {'[email protected]'}
CamelCase list: {'[email protected]', '[email protected]', '[email protected]'}