I'm working with a Facebook scraping, however, I'm having difficulties working with the responses to the comments.
For the collection of comments, this is the code:
import pandas as pd
import facebook_scraper
post_ids = ['1014199301965488']
options = {"comments": True,
"reactors": True,
"allow_extra_requests": True,
}
cookies = "/content/cookies.txt" #it is necessary to generate a Facebook cookie file
replies = []
for post in facebook_scraper.get_posts(post_urls=post_ids, cookies=cookies, options=options):
for p in post['comments_full']:
replies.append(p)
Basically, each comment can have more than one answer. From what I understand, each answer is stored in a list of dictionaries. Here is an example of some replies.
[{'comment_id': '1014587065260045', 'comment_url': 'https://facebook.com/1014587065260045', 'commenter_id': '100002664042251', 'commenter_url': 'https://facebook.com/anderson.ritmoapoesia?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Anderson Ritmoapoesia', 'commenter_meta': None, 'comment_text': 'Boa irmão!\nTmj', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': 'https://scontent.xx.fbcdn.net/m1/v/t6/An_UvxJXg9tdnLU3Y5qjPi0200MLilhzPXUgxzGjQzUMaNcmjdZA6anyrngvkdub33NZzZhd51fpCAEzNHFhko5aKRFP5fS1w_lKwYrzcNLupv27.png?_nc_eui2=AeH0Z9O-PPSBg9l8FeLeTyUHMiCX3WNpzi0yIJfdY2nOLeM4yQsYnDi7Fo-bVaW2oRmOKEYPCsTFZnVoJbmO2yOH&ccb=10-5&oh=00_AT-4ep4a5bI4Gf173sbCjcAhS7gahF9vcYuM9GaQwJsI9g&oe=6301E8F9&_nc_sid=55e238', 'comment_reactors': [{'name': 'Marcio J J Tomaz', 'link': 'https://facebook.com/marcioroberto.rodriguestomaz?fref=pb', 'type': 'like'}], 'comment_reactions': {'like': 1}, 'comment_reaction_count': 1}]
[{'comment_id': '1014272461958172', 'comment_url': 'https://facebook.com/1014272461958172', 'commenter_id': '100009587231687', 'commenter_url': 'https://facebook.com/cassia.danyelle.94?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Cassia Danyelle', 'commenter_meta': None, 'comment_text': 'Concordo!', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}, {'comment_id': '1014275711957847', 'comment_url': 'https://facebook.com/1014275711957847', 'commenter_id': '1227694094', 'commenter_url': 'https://facebook.com/marcusvinicius.espiritosanto?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Marcus Vinicius Espirito Santo', 'commenter_meta': None, 'comment_text': 'Concordo Marcão a única observação que faço é: a justiça deveria funcionar sempre dessa forma rápida e precisa, como neste caso.', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': 'https://scontent.xx.fbcdn.net/m1/v/t6/An_UvxJXg9tdnLU3Y5qjPi0200MLilhzPXUgxzGjQzUMaNcmjdZA6anyrngvkdub33NZzZhd51fpCAEzNHFhko5aKRFP5fS1w_lKwYrzcNLupv27.png?_nc_eui2=AeH0Z9O-PPSBg9l8FeLeTyUHMiCX3WNpzi0yIJfdY2nOLeM4yQsYnDi7Fo-bVaW2oRmOKEYPCsTFZnVoJbmO2yOH&ccb=10-5&oh=00_AT-4ep4a5bI4Gf173sbCjcAhS7gahF9vcYuM9GaQwJsI9g&oe=6301E8F9&_nc_sid=55e238', 'comment_reactors': [{'name': 'Marcos Alexandre de Souza', 'link': 'https://facebook.com/senseimarcos?fref=pb', 'type': 'like'}], 'comment_reactions': {'like': 1}, 'comment_reaction_count': 1}]
[{'comment_id': '1014367808615304', 'comment_url': 'https://facebook.com/1014367808615304', 'commenter_id': '100005145968202', 'commenter_url': 'https://facebook.com/flavioluis.schnurr?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Flavio Luis Schnurr', 'commenter_meta': None, 'comment_text': 'E porque você não morre ! Quem apoia assassinos também é!', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}]
[{'comment_id': '1014222638629821', 'comment_url': 'https://facebook.com/1014222638629821', 'commenter_id': '100009383732423', 'commenter_url': 'https://facebook.com/profile.php?id=100009383732423&fref=nf&rc=p&__tn__=R', 'commenter_name': 'Anerol Ahnuc', 'commenter_meta': None, 'comment_text': 'Hã?', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': 'https://scontent.xx.fbcdn.net/m1/v/t6/An_UvxJXg9tdnLU3Y5qjPi0200MLilhzPXUgxzGjQzUMaNcmjdZA6anyrngvkdub33NZzZhd51fpCAEzNHFhko5aKRFP5fS1w_lKwYrzcNLupv27.png?_nc_eui2=AeH0Z9O-PPSBg9l8FeLeTyUHMiCX3WNpzi0yIJfdY2nOLeM4yQsYnDi7Fo-bVaW2oRmOKEYPCsTFZnVoJbmO2yOH&ccb=10-5&oh=00_AT-4ep4a5bI4Gf173sbCjcAhS7gahF9vcYuM9GaQwJsI9g&oe=6301E8F9&_nc_sid=55e238', 'comment_reactors': [], 'comment_reactions': {'like': 1}, 'comment_reaction_count': 1}, {'comment_id': '1014236578628427', 'comment_url': 'https://facebook.com/1014236578628427', 'commenter_id': '100009383732423', 'commenter_url': 'https://facebook.com/profile.php?id=100009383732423&fref=nf&rc=p&__tn__=R', 'commenter_name': 'Anerol Ahnuc', 'commenter_meta': None, 'comment_text': 'Eu hein?', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}]
[{'comment_id': '1014435731941845', 'comment_url': 'https://facebook.com/1014435731941845', 'commenter_id': '100003779689547', 'commenter_url': 'https://facebook.com/marcia.pimentel.5454?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Márcia Pimentel', 'commenter_meta': None, 'comment_text': 'Não é que sejam defensores Marcondes Martins,sim,eles falam que ele era um ser humano que errou e que podia ter pago de outra maneira,e não com a morte,porque só quem tem direito de tirar a vida das pessoas é Aquele que nos deu... Jesus.', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}, {'comment_id': '1014445965274155', 'comment_url': 'https://facebook.com/1014445965274155', 'commenter_id': '100000515531313', 'commenter_url': 'https://facebook.com/DJ.Marcondes.Martins?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Marcondes Martins', 'commenter_meta': None, 'comment_text': 'Marcia Márcia Pimentel ta teoria é tudo bonitinho. Mas bandidos matam, estupram, humilham pessoas de bem e a justiça ainda protege esses vermes, a sociedade ja está cansada disso.', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}]
Based on the data above, I only need the values for 'comment_text', however, I've never worked with this type of structure. Is it possible to extract each occurrence in 'comment_text'?
CodePudding user response:
Since you're working with a list of dictionaries, I would use a list comprehension to loop the items in the list, and then extract only the key I wanted from each dictionary:
replies.append([reply['comment_text'] for reply in p])
An example of what it would do
p = [{'comment_id': '1014272461958172', 'comment_url': 'https://facebook.com/1014272461958172', 'commenter_id': '100009587231687', 'commenter_url': 'https://facebook.com/cassia.danyelle.94?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Cassia Danyelle', 'commenter_meta': None, 'comment_text': 'Concordo!', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': None, 'comment_reactors': [], 'comment_reactions': None, 'comment_reaction_count': None}, {'comment_id': '1014275711957847', 'comment_url': 'https://facebook.com/1014275711957847', 'commenter_id': '1227694094', 'commenter_url': 'https://facebook.com/marcusvinicius.espiritosanto?fref=nf&rc=p&__tn__=R', 'commenter_name': 'Marcus Vinicius Espirito Santo', 'commenter_meta': None, 'comment_text': 'Concordo Marcão a única observação que faço é: a justiça deveria funcionar sempre dessa forma rápida e precisa, como neste caso.', 'comment_time': datetime.datetime(2015, 8, 17, 0, 0), 'comment_image': 'https://scontent.xx.fbcdn.net/m1/v/t6/An_UvxJXg9tdnLU3Y5qjPi0200MLilhzPXUgxzGjQzUMaNcmjdZA6anyrngvkdub33NZzZhd51fpCAEzNHFhko5aKRFP5fS1w_lKwYrzcNLupv27.png?_nc_eui2=AeH0Z9O-PPSBg9l8FeLeTyUHMiCX3WNpzi0yIJfdY2nOLeM4yQsYnDi7Fo-bVaW2oRmOKEYPCsTFZnVoJbmO2yOH&ccb=10-5&oh=00_AT-4ep4a5bI4Gf173sbCjcAhS7gahF9vcYuM9GaQwJsI9g&oe=6301E8F9&_nc_sid=55e238', 'comment_reactors': [{'name': 'Marcos Alexandre de Souza', 'link': 'https://facebook.com/senseimarcos?fref=pb', 'type': 'like'}], 'comment_reactions': {'like': 1}, 'comment_reaction_count': 1}]
print([reply['comment_text'] for reply in p]) # ['Concordo!', 'Concordo Marcão a única observação que faço é: a justiça deveria funcionar sempre dessa forma rápida e precisa, como neste caso.']