I've now tried everything for the past few hours but I can't extract a specific thing from the HTML below. I want to grab the "sessionCartId" but I can't figure out how....
Thats what i tried so far :
sessioncartid = BeautifulSoup(response.text, "html.parser").findAll("script", {"type":"text/javascript"})[2]
data = json.loads(sessioncartid.text)
print(data)
^^ This gives me the correct script tag but i cant transform it into a json nor get the sessioncarId
<script type="text/javascript">
/*<![CDATA[*/
var ACC = {config: {}};
ACC.config.contextPath = "";
ACC.config.encodedContextPath = "/de/web";
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
ACC.config.rootPath = "/_ui/20220811221438/responsive";
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
ACC.pwdStrengthWeak = 'password.strength.weak';
ACC.pwdStrengthMedium = 'password.strength.medium';
ACC.pwdStrengthStrong = 'password.strength.strong';
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
ACC.config.googleApiKey = "";
ACC.config.googleApiVersion = "3.7";
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
ACC.config.loginUrl = '/de/web/login';
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
/*]]>*/
var OCC =
{
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
"currentUser": "[email protected]",
"sessionCartGuid": "2323121232323",
"sessionCartId": "121212123435324",
"sessionLanguageIso": "de",
"sessionCountryIso": "DE",
"urlPosCode": "web",
"isASM": false,
"intermediaryID": "",
"isASMCustomerEmulated": false,
"siteId": "ghstore",
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
"primaryPointOfSevice": "WEB",
"clientChannel": "web-eu"
};
</script>
CodePudding user response:
This is how you can extract that dictionary:
from bs4 import BeautifulSoup
import json
import re
html = '''
<script type="text/javascript">
/*<![CDATA[*/
var ACC = {config: {}};
ACC.config.contextPath = "";
ACC.config.encodedContextPath = "/de/web";
ACC.config.commonResourcePath = "/_ui/20220811221438/responsive/common";
ACC.config.themeResourcePath = "/_ui/20220811221438/responsive/theme-gh";
ACC.config.siteResourcePath = "/_ui/20220811221438/responsive/site-ghstore";
ACC.config.rootPath = "/_ui/20220811221438/responsive";
ACC.config.CSRFToken = "81b0156a-5a78-4969-b52e-e5080473fb83";
ACC.pwdStrengthVeryWeak = 'password.strength.veryweak';
ACC.pwdStrengthWeak = 'password.strength.weak';
ACC.pwdStrengthMedium = 'password.strength.medium';
ACC.pwdStrengthStrong = 'password.strength.strong';
ACC.pwdStrengthVeryStrong = 'password.strength.verystrong';
ACC.pwdStrengthUnsafePwd = 'password.strength.unsafepwd';
ACC.pwdStrengthTooShortPwd = 'password.strength.tooshortpwd';
ACC.pwdStrengthMinCharText = 'password.strength.minchartext';
ACC.accessibilityLoading = 'aria.pickupinstore.loading';
ACC.accessibilityStoresLoaded = 'aria.pickupinstore.storesloaded';
ACC.config.googleApiKey = "";
ACC.config.googleApiVersion = "3.7";
ACC.autocompleteUrl = '/de/web/search/autocompleteSecure';
ACC.config.loginUrl = '/de/web/login';
ACC.config.authenticationStatusUrl = '/de/web/authentication/status';
/*]]>*/
var OCC =
{
"token": "1799248c-8de0-4199-b5fe-1d610452010a",
"currentUser": "[email protected]",
"sessionCartGuid": "2323121232323",
"sessionCartId": "121212123435324",
"sessionLanguageIso": "de",
"sessionCountryIso": "DE",
"urlPosCode": "web",
"isASM": false,
"intermediaryID": "",
"isASMCustomerEmulated": false,
"siteId": "ghstore",
"OCCBaseUrl": "/ghcommercewebservices/v2/ghstore",
"availablePointsOfService": "BUD,FRA,DTM,HAM,GRZ,HAJ,SZG,VIE,WEB,BER",
"primaryPointOfSevice": "WEB",
"clientChannel": "web-eu"
};
</script>
'''
soup = BeautifulSoup(html, 'html.parser')
info = soup.select_one('script', string = re.compile('sessionCartGuid'))
json_obj = json.loads(info.text.split('var OCC =')[1].split(';')[0])
# print(json_obj)
print(json_obj['token'])
print(json_obj['currentUser'])
print(json_obj['sessionCartId'])
Result:
1799248c-8de0-4199-b5fe-1d610452010a
[email protected]
121212123435324
BeautifulSoup docs: https://beautiful-soup-4.readthedocs.io/en/latest/index.html